diff --git a/TicTacToe/policy_p1 b/TicTacToe/policy_p1
index 72fbaba..6ccbd1d 100644
Binary files a/TicTacToe/policy_p1 and b/TicTacToe/policy_p1 differ
diff --git a/TicTacToe/policy_p2 b/TicTacToe/policy_p2
index 9447c6e..ad5e7d7 100644
Binary files a/TicTacToe/policy_p2 and b/TicTacToe/policy_p2 differ
diff --git a/TicTacToe/tic-tac-toe.ipynb b/TicTacToe/tic-tac-toe.ipynb
index f2a6277..fd3c8da 100644
--- a/TicTacToe/tic-tac-toe.ipynb
+++ b/TicTacToe/tic-tac-toe.ipynb
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -23,12 +23,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 55,
    "metadata": {},
    "outputs": [],
    "source": [
     "BOARD_ROWS = 3\n",
-    "BOARD_COLS = 3"
+    "BOARD_COLS = 3\n",
+    "train = False"
    ]
   },
   {
@@ -44,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 56,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -83,11 +84,10 @@
     "        # diagonal\n",
     "        diag_sum1 = sum([self.board[i, i] for i in range(BOARD_COLS)])\n",
     "        diag_sum2 = sum([self.board[i, BOARD_COLS-i-1] for i in range(BOARD_COLS)])\n",
-    "        diag_sum = max(diag_sum1, diag_sum2)\n",
-    "        if diag_sum == 3:\n",
+    "        if diag_sum1 == 3 or diag_sum2 == 3:\n",
     "            self.isEnd = True\n",
     "            return 1\n",
-    "        if diag_sum == -3:\n",
+    "        if diag_sum1 == -3 or diag_sum2 == -3:\n",
     "            self.isEnd = True\n",
     "            return -1\n",
     "        \n",
@@ -222,7 +222,10 @@
     "                if self.board[i, j] == -1:\n",
     "                    token = 'o'\n",
     "                if self.board[i, j] == 0:\n",
-    "                    token = ' '\n",
+    "                    token = str(j+1 + i*3)\n",
+    "\n",
+    "\n",
+    "\n",
     "                out += token + ' | '\n",
     "            print(out)\n",
     "        print('-------------')    "
@@ -230,7 +233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 57,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -294,7 +297,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 58,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -302,13 +305,25 @@
     "    def __init__(self, name):\n",
     "        self.name = name \n",
     "    \n",
+    "    def cell_to_row_col(self,cell_number):\n",
+    "        if not 1 <= cell_number <= 9:\n",
+    "            raise ValueError(\"Cell number must be between 1 and 9\")\n",
+    "\n",
+    "        row = (cell_number - 1) // 3\n",
+    "        col = (cell_number - 1) % 3\n",
+    "        return row, col\n",
+    "\n",
+    "    \n",
     "    def chooseAction(self, positions):\n",
     "        while True:\n",
-    "            row = int(input(\"Input your action row:\"))\n",
-    "            col = int(input(\"Input your action col:\"))\n",
+    "            cell = int(input(\"Input your action:\"))\n",
+    "            row,col = self.cell_to_row_col(cell)\n",
     "            action = (row, col)\n",
     "            if action in positions:\n",
     "                return action\n",
+    "            \n",
+    "\n",
+    "\n",
     "    \n",
     "    # append a hash state\n",
     "    def addState(self, state):\n",
@@ -331,91 +346,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 59,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "training...\n",
-      "Rounds 0\n",
-      "Rounds 1000\n",
-      "Rounds 2000\n",
-      "Rounds 3000\n",
-      "Rounds 4000\n",
-      "Rounds 5000\n",
-      "Rounds 6000\n",
-      "Rounds 7000\n",
-      "Rounds 8000\n",
-      "Rounds 9000\n",
-      "Rounds 10000\n",
-      "Rounds 11000\n",
-      "Rounds 12000\n",
-      "Rounds 13000\n",
-      "Rounds 14000\n",
-      "Rounds 15000\n",
-      "Rounds 16000\n",
-      "Rounds 17000\n",
-      "Rounds 18000\n",
-      "Rounds 19000\n",
-      "Rounds 20000\n",
-      "Rounds 21000\n",
-      "Rounds 22000\n",
-      "Rounds 23000\n",
-      "Rounds 24000\n",
-      "Rounds 25000\n",
-      "Rounds 26000\n",
-      "Rounds 27000\n",
-      "Rounds 28000\n",
-      "Rounds 29000\n",
-      "Rounds 30000\n",
-      "Rounds 31000\n",
-      "Rounds 32000\n",
-      "Rounds 33000\n",
-      "Rounds 34000\n",
-      "Rounds 35000\n",
-      "Rounds 36000\n",
-      "Rounds 37000\n",
-      "Rounds 38000\n",
-      "Rounds 39000\n",
-      "Rounds 40000\n",
-      "Rounds 41000\n",
-      "Rounds 42000\n",
-      "Rounds 43000\n",
-      "Rounds 44000\n",
-      "Rounds 45000\n",
-      "Rounds 46000\n",
-      "Rounds 47000\n",
-      "Rounds 48000\n",
-      "Rounds 49000\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "p1 = Player(\"p1\")\n",
     "p2 = Player(\"p2\")\n",
-    "\n",
     "st = State(p1, p2)\n",
-    "print(\"training...\")\n",
-    "st.play(50000)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "p1.savePolicy()\n",
-    "p2.savePolicy()"
+    "\n",
+    "# If you want to re-train the agent change the variable at the beginning of the code to True\n",
+    "# The agent takes approximately 3 minutes to train\n",
+    "if train:\n",
+    "    print(\"training...\")\n",
+    "    st.play(50000)\n",
+    "    p1.savePolicy()\n",
+    "    p2.savePolicy()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 60,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -431,7 +383,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 63,
    "metadata": {},
    "outputs": [
     {
@@ -439,141 +391,62 @@
      "output_type": "stream",
      "text": [
       "-------------\n",
-      "|   |   |   | \n",
-      "-------------\n",
-      "|   | x |   | \n",
+      "| x | 2 | 3 | \n",
       "-------------\n",
-      "|   |   |   | \n",
+      "| 4 | 5 | 6 | \n",
       "-------------\n",
-      "Input your action row:2\n",
-      "Input your action col:2\n",
+      "| 7 | 8 | 9 | \n",
       "-------------\n",
-      "|   |   |   | \n",
       "-------------\n",
-      "|   | x |   | \n",
+      "| x | 2 | 3 | \n",
       "-------------\n",
-      "|   |   | o | \n",
+      "| 4 | o | 6 | \n",
       "-------------\n",
+      "| 7 | 8 | 9 | \n",
       "-------------\n",
-      "|   |   |   | \n",
       "-------------\n",
-      "|   | x |   | \n",
+      "| x | 2 | 3 | \n",
       "-------------\n",
-      "|   | x | o | \n",
+      "| 4 | o | 6 | \n",
       "-------------\n",
-      "Input your action row:0\n",
-      "Input your action col:1\n",
+      "| 7 | 8 | x | \n",
       "-------------\n",
-      "|   | o |   | \n",
       "-------------\n",
-      "|   | x |   | \n",
+      "| x | 2 | 3 | \n",
       "-------------\n",
-      "|   | x | o | \n",
+      "| 4 | o | 6 | \n",
       "-------------\n",
+      "| 7 | o | x | \n",
       "-------------\n",
-      "|   | o | x | \n",
       "-------------\n",
-      "|   | x |   | \n",
+      "| x | x | 3 | \n",
       "-------------\n",
-      "|   | x | o | \n",
+      "| 4 | o | 6 | \n",
       "-------------\n",
-      "Input your action row:1\n",
-      "Input your action col:1\n",
-      "Input your action row:1\n",
-      "Input your action col:0\n",
+      "| 7 | o | x | \n",
       "-------------\n",
-      "|   | o | x | \n",
-      "-------------\n",
-      "| o | x |   | \n",
-      "-------------\n",
-      "|   | x | o | \n",
-      "-------------\n",
-      "-------------\n",
-      "|   | o | x | \n",
-      "-------------\n",
-      "| o | x |   | \n",
       "-------------\n",
       "| x | x | o | \n",
       "-------------\n",
-      "computer wins!\n"
-     ]
-    }
-   ],
-   "source": [
-    "p1 = Player(\"computer\", exp_rate=0)\n",
-    "p1.loadPolicy(\"policy_p1\")\n",
-    "\n",
-    "p2 = HumanPlayer(\"human\")\n",
-    "\n",
-    "st = State(p1, p2)\n",
-    "st.play2()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-------------\n",
-      "|   |   |   | \n",
+      "| 4 | o | 6 | \n",
       "-------------\n",
-      "|   |   | x | \n",
+      "| 7 | o | x | \n",
       "-------------\n",
-      "|   |   |   | \n",
       "-------------\n",
-      "Input your action row:2\n",
-      "Input your action col:2\n",
-      "-------------\n",
-      "|   |   |   | \n",
-      "-------------\n",
-      "|   |   | x | \n",
-      "-------------\n",
-      "|   |   | o | \n",
-      "-------------\n",
-      "-------------\n",
-      "|   |   |   | \n",
-      "-------------\n",
-      "|   | x | x | \n",
-      "-------------\n",
-      "|   |   | o | \n",
-      "-------------\n",
-      "Input your action row:1\n",
-      "Input your action col:0\n",
-      "-------------\n",
-      "|   |   |   | \n",
-      "-------------\n",
-      "| o | x | x | \n",
-      "-------------\n",
-      "|   |   | o | \n",
-      "-------------\n",
-      "-------------\n",
-      "|   |   |   | \n",
-      "-------------\n",
-      "| o | x | x | \n",
-      "-------------\n",
-      "| x |   | o | \n",
-      "-------------\n",
-      "Input your action row:0\n",
-      "Input your action col:0\n",
-      "-------------\n",
-      "| o |   |   | \n",
+      "| x | x | o | \n",
       "-------------\n",
-      "| o | x | x | \n",
+      "| x | o | 6 | \n",
       "-------------\n",
-      "| x |   | o | \n",
+      "| 7 | o | x | \n",
       "-------------\n",
       "-------------\n",
-      "| o |   | x | \n",
+      "| x | x | o | \n",
       "-------------\n",
-      "| o | x | x | \n",
+      "| x | o | 6 | \n",
       "-------------\n",
-      "| x |   | o | \n",
+      "| o | o | x | \n",
       "-------------\n",
-      "computer wins!\n"
+      "human wins!\n"
      ]
     }
    ],
@@ -604,7 +477,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,
diff --git a/TicTacToe/ticTacToe.py b/TicTacToe/ticTacToe.py
index 5e18039..85ec08c 100644
--- a/TicTacToe/ticTacToe.py
+++ b/TicTacToe/ticTacToe.py
@@ -3,7 +3,7 @@
 
 BOARD_ROWS = 3
 BOARD_COLS = 3
-
+train = False
 
 class State:
     def __init__(self, p1, p2):
@@ -39,14 +39,22 @@ def winner(self):
                 return -1
         # diagonal
         diag_sum1 = sum([self.board[i, i] for i in range(BOARD_COLS)])
-        diag_sum2 = sum([self.board[i, BOARD_COLS - i - 1] for i in range(BOARD_COLS)])
-        diag_sum = max(abs(diag_sum1), abs(diag_sum2))
-        if diag_sum == 3:
+        diag_sum2 = sum([self.board[i, BOARD_COLS-i-1] for i in range(BOARD_COLS)])
+        if diag_sum1 == 3 or diag_sum2 == 3:
             self.isEnd = True
-            if diag_sum1 == 3 or diag_sum2 == 3:
-                return 1
-            else:
-                return -1
+            return 1
+        if diag_sum1 == -3 or diag_sum2 == -3:
+            self.isEnd = True
+            return -1
+        
+        # tie
+        # no available positions
+        if len(self.availablePositions()) == 0:
+            self.isEnd = True
+            return 0
+        # not end
+        self.isEnd = False
+        return None
 
         # tie
         # no available positions
@@ -179,10 +187,13 @@ def showBoard(self):
                 if self.board[i, j] == -1:
                     token = 'o'
                 if self.board[i, j] == 0:
-                    token = ' '
+                    token = str(j+1 + i*3)
+
+
+
                 out += token + ' | '
             print(out)
-        print('-------------')
+        print('-------------')    
 
 
 class Player:
@@ -245,40 +256,57 @@ def loadPolicy(self, file):
 
 class HumanPlayer:
     def __init__(self, name):
-        self.name = name
+        self.name = name 
+    
+    def cell_to_row_col(self,cell_number):
+        if not 1 <= cell_number <= 9:
+            raise ValueError("Cell number must be between 1 and 9")
+
+        row = (cell_number - 1) // 3
+        col = (cell_number - 1) % 3
+        return row, col
 
+    
     def chooseAction(self, positions):
         while True:
-            row = int(input("Input your action row:"))
-            col = int(input("Input your action col:"))
+            cell = int(input("Input your action:"))
+            row,col = self.cell_to_row_col(cell)
             action = (row, col)
             if action in positions:
                 return action
+            
 
+
+    
     # append a hash state
     def addState(self, state):
         pass
-
+    
     # at the end of game, backpropagate and update states value
     def feedReward(self, reward):
         pass
-
+            
     def reset(self):
         pass
 
 
 if __name__ == "__main__":
-    # training
+    
     p1 = Player("p1")
     p2 = Player("p2")
-
     st = State(p1, p2)
-    print("training...")
-    st.play(50000)
+
+    # If you want to re-train the agent change the variable at the beginning of the code to True
+    # The agent takes approximately 3 minutes to train
+    if train:
+        print("training...")
+        st.play(5000)
+        p1.savePolicy()
+        p2.savePolicy()
 
     # play with human
     p1 = Player("computer", exp_rate=0)
-    p1.loadPolicy("policy_p1")
+    p1.loadPolicy("TicTacToe/policy_p1")
 
     p2 = HumanPlayer("human")