diff --git a/tutorial1/rl-taxi-v3-using-q-learning.ipynb b/tutorial1/rl-taxi-v3-using-q-learning.ipynb new file mode 100644 index 0000000..376f301 --- /dev/null +++ b/tutorial1/rl-taxi-v3-using-q-learning.ipynb @@ -0,0 +1,1598 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "execution_count": null, + "metadata": { + "papermill": { + "duration": 0.005327, + "end_time": "2020-08-25T14:57:31.773342", + "exception": false, + "start_time": "2020-08-25T14:57:31.768015", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Taxi-v3 using Q-Learning\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "There are 4 locations (labeled by different letters) and your job is to pick up the passenger at one location and drop him off in another. You receive +20 points for a successful dropoff, and lose 1 point for every timestep it takes. There is also a 10 point penalty for illegal pick-up and drop-off actions." + ] + }, + { + "cell_type": "markdown", + "execution_count": null, + "metadata": { + "papermill": { + "duration": 0.003496, + "end_time": "2020-08-25T14:57:31.781034", + "exception": false, + "start_time": "2020-08-25T14:57:31.777538", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### 0. Necessary Dependancies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", + "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", + "execution": { + "iopub.execute_input": "2020-08-25T14:57:31.794019Z", + "iopub.status.busy": "2020-08-25T14:57:31.793384Z", + "iopub.status.idle": "2020-08-25T14:57:31.852938Z", + "shell.execute_reply": "2020-08-25T14:57:31.852320Z" + }, + "papermill": { + "duration": 0.068197, + "end_time": "2020-08-25T14:57:31.853090", + "exception": false, + "start_time": "2020-08-25T14:57:31.784893", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import gym\n", + "import random\n", + "\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "markdown", + "execution_count": null, + "metadata": { + "papermill": { + "duration": 0.003555, + "end_time": "2020-08-25T14:57:31.861009", + "exception": false, + "start_time": "2020-08-25T14:57:31.857454", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### 1. Pre-Setup" + ] + }, + { + "cell_type": "markdown", + "execution_count": null, + "metadata": { + "papermill": { + "duration": 0.003625, + "end_time": "2020-08-25T14:57:31.868368", + "exception": false, + "start_time": "2020-08-25T14:57:31.864743", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Create the Environment**" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-25T14:57:31.880366Z", + "iopub.status.busy": "2020-08-25T14:57:31.879762Z", + "iopub.status.idle": "2020-08-25T14:57:33.095674Z", + "shell.execute_reply": "2020-08-25T14:57:33.096470Z" + }, + "papermill": { + "duration": 1.224504, + "end_time": "2020-08-25T14:57:33.096663", + "exception": false, + "start_time": "2020-08-25T14:57:31.872159", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---------+\n", + "|R: |\u001b[43m \u001b[0m: :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + "\n" + ] + } + ], + "source": [ + "env = gym.make('Taxi-v3')\n", + "env.render()" + ] + }, + { + "cell_type": "markdown", + "execution_count": null, + "metadata": { + "papermill": { + "duration": 0.003726, + "end_time": "2020-08-25T14:57:33.105305", + "exception": false, + "start_time": "2020-08-25T14:57:33.101579", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Create the Q-Table**\n", + "\n", + "The Q-table is NxM table where N corresponds to the number of states and M corresponds to the number of actions that the agent can take." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-25T14:57:33.119712Z", + "iopub.status.busy": "2020-08-25T14:57:33.119122Z", + "iopub.status.idle": "2020-08-25T14:57:33.122348Z", + "shell.execute_reply": "2020-08-25T14:57:33.122834Z" + }, + "papermill": { + "duration": 0.01239, + "end_time": "2020-08-25T14:57:33.122980", + "exception": false, + "start_time": "2020-08-25T14:57:33.110590", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total possible actions: 6\n", + "Total states: 500\n" + ] + } + ], + "source": [ + "action_size = env.action_space.n\n", + "print(f\"Total possible actions: {action_size}\")\n", + "\n", + "state_size = env.observation_space.n\n", + "print(f\"Total states: {state_size}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-25T14:57:33.136502Z", + "iopub.status.busy": "2020-08-25T14:57:33.135665Z", + "iopub.status.idle": "2020-08-25T14:57:33.139228Z", + "shell.execute_reply": "2020-08-25T14:57:33.138593Z" + }, + "papermill": { + "duration": 0.012441, + "end_time": "2020-08-25T14:57:33.139335", + "exception": false, + "start_time": "2020-08-25T14:57:33.126894", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0.]\n", + " ...\n", + " [0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0.]]\n" + ] + } + ], + "source": [ + "qtable = np.zeros((state_size,action_size))\n", + "print(qtable)" + ] + }, + { + "cell_type": "markdown", + "execution_count": null, + "metadata": { + "papermill": { + "duration": 0.003682, + "end_time": "2020-08-25T14:57:33.147245", + "exception": false, + "start_time": "2020-08-25T14:57:33.143563", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Hyperparameters**" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-25T14:57:33.160242Z", + "iopub.status.busy": "2020-08-25T14:57:33.159640Z", + "iopub.status.idle": "2020-08-25T14:57:33.162452Z", + "shell.execute_reply": "2020-08-25T14:57:33.161848Z" + }, + "papermill": { + "duration": 0.011405, + "end_time": "2020-08-25T14:57:33.162551", + "exception": false, + "start_time": "2020-08-25T14:57:33.151146", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "total_episodes = 50000 # Total episodes to train the agent for\n", + "total_test_episodes = 10 # Number of test episodes\n", + "max_steps = 99 # Terminate if the agent takes more than 99 steps\n", + "\n", + "alpha = 0.7 # Learning rate\n", + "gamma = 0.618 # Discounting rate for rewards\n", + "\n", + "# parameters for maintaining trade-off between exploration-exploitation\n", + "epsilon = 1.0 # Exploration rate\n", + "max_epsilon = 1.0 # Exploration probability at the start\n", + "min_epsilon = 0.01 # Minimum exploration probability\n", + "decay_rate = 0.01 # rate at which epsilon shrinks " + ] + }, + { + "cell_type": "markdown", + "execution_count": null, + "metadata": { + "papermill": { + "duration": 0.003746, + "end_time": "2020-08-25T14:57:33.170560", + "exception": false, + "start_time": "2020-08-25T14:57:33.166814", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### 2. Q-Learning Algorithm\n", + "\n", + "![](https://leimao.github.io/images/blog/2019-03-14-RL-On-Policy-VS-Off-Policy/q-learning.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-25T14:57:33.187463Z", + "iopub.status.busy": "2020-08-25T14:57:33.186851Z", + "iopub.status.idle": "2020-08-25T14:57:52.341020Z", + "shell.execute_reply": "2020-08-25T14:57:52.340241Z" + }, + "papermill": { + "duration": 19.166629, + "end_time": "2020-08-25T14:57:52.341197", + "exception": false, + "start_time": "2020-08-25T14:57:33.174568", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 50000/50000 [00:19<00:00, 2612.08it/s]\n" + ] + } + ], + "source": [ + "# iterate over every episode\n", + "for episode in tqdm(range(total_episodes)):\n", + " \n", + " # Reset the environment at every episode\n", + " state = env.reset()\n", + " # flag to check if episode is terminated or not\n", + " done = False\n", + " \n", + " # iterate over all steps that the agent can take in an episode\n", + " for step in range(max_steps): \n", + " \n", + " # Select an action based on the epsilon-greedy policy\n", + " \n", + " # probability to select exploitation\n", + " one_minus_epsilon = random.uniform(0,1)\n", + " \n", + " # if one_minus_epsilon is greater than epsilon then exploit \n", + " if one_minus_epsilon > epsilon:\n", + " action = np.argmax(qtable[state,:])\n", + " # else explore by selecting an action randomly\n", + " else:\n", + " action = env.action_space.sample()\n", + " \n", + " # Take this action to reach the next state and get a reward \n", + " new_state, reward, done, info = env.step(action)\n", + " \n", + " # update the Q-Table based on the formula given in the algorithm\n", + " qtable[state,action] = qtable[state,action] + alpha*(reward + gamma*np.max(qtable[new_state,:]) - qtable[state,action])\n", + " \n", + " # update the current state\n", + " state = new_state\n", + " \n", + " # if the agent has reached termination state then break\n", + " if done:\n", + " break\n", + " \n", + " # epsilon decay to maintain trade-off between exploration-exploitation\n", + " epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)" + ] + }, + { + "cell_type": "markdown", + "execution_count": null, + "metadata": { + "papermill": { + "duration": 0.0119, + "end_time": "2020-08-25T14:57:52.367006", + "exception": false, + "start_time": "2020-08-25T14:57:52.355106", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### 3. Test the agent" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-25T14:57:52.401923Z", + "iopub.status.busy": "2020-08-25T14:57:52.400945Z", + "iopub.status.idle": "2020-08-25T14:57:52.430227Z", + "shell.execute_reply": "2020-08-25T14:57:52.426409Z" + }, + "papermill": { + "duration": 0.051592, + "end_time": "2020-08-25T14:57:52.430443", + "exception": false, + "start_time": "2020-08-25T14:57:52.378851", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "********************************************************************************\n", + "Episode 1:\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | :\u001b[43m \u001b[0m| : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| :\u001b[43m \u001b[0m| : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "|\u001b[43m \u001b[0m: | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1m\u001b[43mR\u001b[0m\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "|\u001b[42m_\u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "Score: 10\n", + "********************************************************************************\n", + "Episode 2:\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| : : :\u001b[43m \u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : :\u001b[43m \u001b[0m|\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : :\u001b[43m \u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[34;1m\u001b[43mG\u001b[0m\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[42mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : :\u001b[42m_\u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : :\u001b[42m_\u001b[0m|\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "|\u001b[42m_\u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "Score: 4\n", + "********************************************************************************\n", + "Episode 3:\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m:\u001b[43m \u001b[0m| : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| :\u001b[43m \u001b[0m| : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "|\u001b[43m \u001b[0m| : | : |\n", + "|\u001b[34;1mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[34;1m\u001b[43mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[42mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "|\u001b[42m_\u001b[0m| : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "|\u001b[42m_\u001b[0m: | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[35m\u001b[42mR\u001b[0m\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "Score: 10\n", + "********************************************************************************\n", + "Episode 4:\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | :\u001b[43m \u001b[0m:G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : | :\u001b[43m \u001b[0m: |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : :\u001b[43m \u001b[0m: |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "|\u001b[43m \u001b[0m: : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :G|\n", + "|\u001b[43m \u001b[0m: | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1m\u001b[43mR\u001b[0m\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "|\u001b[42m_\u001b[0m: | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| :\u001b[42m_\u001b[0m| : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : |\u001b[42m_\u001b[0m: |\n", + "|Y| : |\u001b[35mB\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[35m\u001b[42mB\u001b[0m\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "Score: 5\n", + "********************************************************************************\n", + "Episode 5:\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |\u001b[34;1m\u001b[43mB\u001b[0m\u001b[0m: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |\u001b[42mB\u001b[0m: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : |\u001b[42m_\u001b[0m: |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "|\u001b[42m_\u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "Score: 12\n", + "********************************************************************************\n", + "Episode 6:\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m|\u001b[43m \u001b[0m: |B: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| |\u001b[43m \u001b[0m: | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| : : :\u001b[43m \u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : :\u001b[43m \u001b[0m|\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[34;1mG\u001b[0m|\n", + "| : | : :\u001b[43m \u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[34;1m\u001b[43mG\u001b[0m\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[42mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : :\u001b[42m_\u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : :\u001b[42m_\u001b[0m|\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "|\u001b[42m_\u001b[0m| : | : |\n", + "|\u001b[35mY\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|\u001b[35m\u001b[42mY\u001b[0m\u001b[0m| : |B: |\n", + "+---------+\n", + " (South)\n", + "Score: 4\n", + "********************************************************************************\n", + "Episode 7:\n", + "+---------+\n", + "|\u001b[34;1m\u001b[43mR\u001b[0m\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "|\u001b[42m_\u001b[0m: | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : :\u001b[42m_\u001b[0m|\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : :\u001b[42m_\u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[35m\u001b[42mG\u001b[0m\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "Score: 11\n", + "********************************************************************************\n", + "Episode 8:\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|Y| : |\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : :\u001b[43m \u001b[0m: |\n", + "| | : | : |\n", + "|Y| : |\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : |\u001b[43m \u001b[0m: |\n", + "|Y| : |\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[34;1m\u001b[43mB\u001b[0m\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[42mB\u001b[0m: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B:\u001b[42m_\u001b[0m|\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | :\u001b[42m_\u001b[0m|\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : :\u001b[42m_\u001b[0m|\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : :\u001b[42m_\u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[35m\u001b[42mG\u001b[0m\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "Score: 10\n", + "********************************************************************************\n", + "Episode 9:\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: |\u001b[43m \u001b[0m: :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | :\u001b[43m \u001b[0m:G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | :\u001b[43m \u001b[0m: |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : :\u001b[43m \u001b[0m: |\n", + "| | : | : |\n", + "|Y| : |\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : |\u001b[43m \u001b[0m: |\n", + "|Y| : |\u001b[34;1mB\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[34;1m\u001b[43mB\u001b[0m\u001b[0m: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |\u001b[42mB\u001b[0m: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : |\u001b[42m_\u001b[0m: |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "| : | : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[35mR\u001b[0m: | : :G|\n", + "|\u001b[42m_\u001b[0m: | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[35m\u001b[42mR\u001b[0m\u001b[0m: | : :G|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "Score: 7\n", + "********************************************************************************\n", + "Episode 10:\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[35m\u001b[43mG\u001b[0m\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + "\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | :\u001b[43m \u001b[0m:\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| : | :\u001b[43m \u001b[0m: |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : :\u001b[43m \u001b[0m: |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : :\u001b[43m \u001b[0m: : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| :\u001b[43m \u001b[0m: : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| :\u001b[43m \u001b[0m| : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[34;1mR\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "|\u001b[43m \u001b[0m: | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (West)\n", + "+---------+\n", + "|\u001b[34;1m\u001b[43mR\u001b[0m\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|\u001b[42mR\u001b[0m: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (Pickup)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "|\u001b[42m_\u001b[0m: | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "|\u001b[42m_\u001b[0m: : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (South)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| :\u001b[42m_\u001b[0m: : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : :\u001b[42m_\u001b[0m: : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : :\u001b[42m_\u001b[0m: |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : : |\n", + "| : : : :\u001b[42m_\u001b[0m|\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (East)\n", + "+---------+\n", + "|R: | : :\u001b[35mG\u001b[0m|\n", + "| : | : :\u001b[42m_\u001b[0m|\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "+---------+\n", + "|R: | : :\u001b[35m\u001b[42mG\u001b[0m\u001b[0m|\n", + "| : | : : |\n", + "| : : : : |\n", + "| | : | : |\n", + "|Y| : |B: |\n", + "+---------+\n", + " (North)\n", + "Score: 3\n", + "Average Rewards: 7.6\n" + ] + } + ], + "source": [ + "# keep track of all rewards\n", + "rewards = []\n", + "\n", + "for episode in range(total_test_episodes):\n", + " \n", + " state = env.reset()\n", + " done = False\n", + " total_rewards = 0\n", + " print(f\"{'*'*80}\")\n", + " print(f\"Episode {episode + 1}:\")\n", + " \n", + " for step in range(max_steps):\n", + " \n", + " # render every frame of the agent\n", + " env.render()\n", + " \n", + " # take an action that has max expected future reward given in that state\n", + " action = np.argmax(qtable[state,:])\n", + " \n", + " new_state, reward, done, info = env.step(action)\n", + " \n", + " total_rewards += reward\n", + " \n", + " if done:\n", + " # keep track of rewards received at every episode\n", + " rewards.append(total_rewards)\n", + " print(f\"Score: {total_rewards}\")\n", + " break\n", + " \n", + " state = new_state\n", + "\n", + "env.close()\n", + "print(f\"Average Rewards: {sum(rewards)/total_test_episodes}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "papermill": { + "duration": 25.17346, + "end_time": "2020-08-25T14:57:52.552402", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2020-08-25T14:57:27.378942", + "version": "2.1.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}