Kaggle · hannw · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025
diff --git a/assets/logo/chatgpt.png b/assets/logo/chatgpt.png
diff --git a/assets/logo/claude.png b/assets/logo/claude.png
diff --git a/assets/logo/deepseek.png b/assets/logo/deepseek.png
diff --git a/assets/logo/gemini.png b/assets/logo/gemini.png
diff --git a/assets/logo/grok.png b/assets/logo/grok.png
diff --git a/assets/logo/kimi.png b/assets/logo/kimi.png
diff --git a/assets/logo/qwen.png b/assets/logo/qwen.png
diff --git a/assets/moon4.png b/assets/moon4.png
diff --git a/assets/stickman.fbx b/assets/stickman.fbx
diff --git a/assets/stickman.glb b/assets/stickman.glb
diff --git a/kaggle_environments/__init__.py b/kaggle_environments/__init__.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 from importlib import import_module
 from os import listdir
 from .agent import Agent
@@ -26,6 +27,9 @@
            "make", "register", "utils", "__version__",
            "get_episode_replay", "list_episodes", "list_episodes_for_team", "list_episodes_for_submission"]
 
+_script_dir = os.path.dirname(os.path.realpath(__file__))
+PROJECT_ROOT = os.path.abspath(os.path.join('..', _script_dir))
+
 # Register Environments.
 
 for name in listdir(utils.envs_path):

diff --git a/kaggle_environments/agent.py b/kaggle_environments/agent.py
@@ -105,6 +105,11 @@ def build_agent(raw, builtin_agents, environment_name):
     Returns the agent and whether the agent is parallelizable.
     """
     if raw in builtin_agents:
+        agent = builtin_agents[raw]
+        # TODO: Below is a hack. Assuming an agent is a global callable is not enough to guarantee it is stateless.
+        #  Kaggle environment should allow more scalable agent initialization and proper agent interface design.
+        if hasattr(agent, "reset"):
+            agent.reset()
         return builtin_agents[raw], False
 
     # Already callable.
@@ -163,16 +168,23 @@ def act(self, observation):
 
         # Start the timer.
 
-        with StringIO() as out_buffer, StringIO() as err_buffer, redirect_stdout(out_buffer), redirect_stderr(err_buffer):
-            try:
-                start = perf_counter()
-                action = self.agent(*args)
-            except Exception as e:
-                traceback.print_exc(file=err_buffer)
-                action = e
-
-            out = out_buffer.getvalue()
-            err = err_buffer.getvalue()
+        if self.debug:
+            # Adding a debugging branch here, since the context manager and try except would prevent
+            # debugger from functioning properly.
+            start = perf_counter()
+            action = self.agent(*args)
+            out = ""
+            err = ""
+        else:
+            with StringIO() as out_buffer, StringIO() as err_buffer, redirect_stdout(out_buffer), redirect_stderr(err_buffer):
+                try:
+                    start = perf_counter()
+                    action = self.agent(*args)
+                except Exception as e:
+                    traceback.print_exc(file=err_buffer)
+                    action = e
+                out = out_buffer.getvalue()
+                err = err_buffer.getvalue()
             # Get the maximum log length
             # Allow up to 10k (default) log characters per step which is ~10MB per 600 step episode
             max_log_length = self.configuration.get('maxLogLength', 10000)

diff --git a/kaggle_environments/core.py b/kaggle_environments/core.py
@@ -572,31 +572,34 @@ def update_props(props):
             )
         return data
 
-    def __run_interpreter(self, state, logs):
+    def __loop_through_interpreter(self, state, logs):
+        args = [structify(state), self, logs]
+        new_state = structify(self.interpreter(
+            *args[:self.interpreter.__code__.co_argcount]))
+        new_state[0].observation.step = (
+            0 if self.done
+            else len(self.steps)
+        )
+
+        for index, agent in enumerate(new_state):
+            if index < len(logs) and "duration" in logs[index]:
+                duration = logs[index]["duration"]
+                overage_time_consumed = max(0, duration - self.configuration.actTimeout)
+                agent.observation.remainingOverageTime -= overage_time_consumed
+            if agent.status not in self.__state_schema.properties.status.enum:
+                self.debug_print(f"Invalid Action: {agent.status}")
+                agent.status = "INVALID"
+            if agent.status in ["ERROR", "INVALID", "TIMEOUT"]:
+                agent.reward = None
+        return new_state
+
+    def __run_interpreter_prod(self, state, logs):
         out = None
         err = None
-        # Append any environmental logs to any agent logs we collected.
         try:
             with StringIO() as out_buffer, StringIO() as err_buffer, redirect_stdout(out_buffer), redirect_stderr(err_buffer):
                 try:
-                    args = [structify(state), self, logs]
-                    new_state = structify(self.interpreter(
-                        *args[:self.interpreter.__code__.co_argcount]))
-                    new_state[0].observation.step = (
-                        0 if self.done
-                        else len(self.steps)
-                    )
-
-                    for index, agent in enumerate(new_state):
-                        if index < len(logs) and "duration" in logs[index]:
-                            duration = logs[index]["duration"]
-                            overage_time_consumed = max(0, duration - self.configuration.actTimeout)
-                            agent.observation.remainingOverageTime -= overage_time_consumed
-                        if agent.status not in self.__state_schema.properties.status.enum:
-                            self.debug_print(f"Invalid Action: {agent.status}")
-                            agent.status = "INVALID"
-                        if agent.status in ["ERROR", "INVALID", "TIMEOUT"]:
-                            agent.reward = None
+                    new_state = self.__loop_through_interpreter(state, logs)
                     return new_state
                 except Exception as e:
                     # Print the exception stack trace to our log
@@ -629,6 +632,13 @@ def __run_interpreter(self, state, logs):
                     err = err[:-1]
                 self.debug_print(err)
 
+    def __run_interpreter(self, state, logs):
+        # Append any environmental logs to any agent logs we collected.
+        if self.debug:
+            return self.__loop_through_interpreter(state, logs)
+        else:
+            return self.__run_interpreter_prod(state, logs)
+
     def __process_specification(self, spec):
         if has(spec, path=["reward"]):
             reward = spec["reward"]

diff --git a/kaggle_environments/envs/connectx/test_connectx.py b/kaggle_environments/envs/connectx/test_connectx.py
@@ -21,7 +21,7 @@ def before_each(state=None, configuration=None):
     global env
     steps = [] if state == None else [state]
     env = make("connectx", steps=steps,
-               configuration=configuration, debug=True)
+               configuration=configuration, debug=False)
 
 
 def test_has_correct_timeouts():

diff --git a/kaggle_environments/envs/llm_20_questions/test_llm_20_questions.py b/kaggle_environments/envs/llm_20_questions/test_llm_20_questions.py
@@ -25,14 +25,14 @@ def error_agent():
     raise ValueError
 
 def test_llm_20_q_completes():
-    env = make("llm_20_questions", debug=True)
+    env = make("llm_20_questions", debug=False)
     env.run([custom_questioner, custom_answerer, custom_questioner, custom_answerer])
     json = env.toJSON()
     assert json["name"] == "llm_20_questions"
     assert json["statuses"] == ["DONE", "DONE", "DONE", "DONE"]
 
 def test_llm_20_q_errors_on_bad_answer():
-    env = make("llm_20_questions", debug=True)
+    env = make("llm_20_questions", debug=False)
     env.run([custom_questioner, custom_answerer, custom_questioner, bad_answerer])
     json = env.toJSON()
     assert json["name"] == "llm_20_questions"
@@ -42,7 +42,7 @@ def test_llm_20_q_errors_on_bad_answer():
     assert len(json["steps"]) == 3
 
 def test_llm_20_q_errors_on_error_answer():
-    env = make("llm_20_questions", debug=True)
+    env = make("llm_20_questions", debug=False)
     env.run([custom_questioner, custom_answerer, custom_questioner, error_agent])
     json = env.toJSON()
     assert json["name"] == "llm_20_questions"
@@ -51,7 +51,7 @@ def test_llm_20_q_errors_on_error_answer():
     assert len(json["steps"]) == 3
 
 def test_llm_20_q_errors_on_error_question():
-    env = make("llm_20_questions", debug=True)
+    env = make("llm_20_questions", debug=False)
     env.run([custom_questioner, custom_answerer, error_agent, custom_answerer])
     json = env.toJSON()
     assert json["name"] == "llm_20_questions"
@@ -60,7 +60,7 @@ def test_llm_20_q_errors_on_error_question():
     assert len(json["steps"]) == 2
 
 def test_llm_20_q_errors_on_error_last_guess():
-    env = make("llm_20_questions", debug=True)
+    env = make("llm_20_questions", debug=False)
     env.run([custom_questioner, custom_answerer, last_round_guesser_error, custom_answerer])
     json = env.toJSON()
     assert json["name"] == "llm_20_questions"

diff --git a/kaggle_environments/envs/tictactoe/test_tictactoe.py b/kaggle_environments/envs/tictactoe/test_tictactoe.py
@@ -51,7 +51,7 @@ def custom6(obs):
 def before_each(state=None):
     global env
     steps = [] if state == None else [state]
-    env = make("tictactoe", steps=steps, debug=True)
+    env = make("tictactoe", steps=steps, debug=False)
 
 
 def test_to_json():
@@ -201,22 +201,22 @@ def test_can_run_custom_agents():
 
 
 def test_agents_can_timeout_on_init():
-    env = make("tictactoe", debug=True)
+    env = make("tictactoe", debug=False)
     state = env.run([custom1, custom3])[-1]
     assert state[1]["status"] == "TIMEOUT"
     assert state[1]["observation"]["remainingOverageTime"] < 0
 
 
 def test_agents_can_timeout_on_act():
-    env = make("tictactoe", debug=True)
+    env = make("tictactoe", debug=False)
     state = env.run([custom1, custom6])[-1]
     print(state)
     assert state[1]["status"] == "TIMEOUT"
     assert state[1]["observation"]["remainingOverageTime"] < 0
 
 
 def test_run_timeout():
-    env = make("tictactoe", debug=True, configuration={"actTimeout": 10, "runTimeout": 1})
+    env = make("tictactoe", debug=False, configuration={"actTimeout": 10, "runTimeout": 1})
     try:
         state = env.run([custom1, custom3])[-1]
     except DeadlineExceeded:

diff --git a/kaggle_environments/envs/werewolf/GAME_RULE.md b/kaggle_environments/envs/werewolf/GAME_RULE.md
@@ -0,0 +1,75 @@
+# Werewolf: Game Rules
+
+Welcome to Werewolf, a game of social deduction, team collaboration, deception, and survival. Players are secretly assigned roles on one of two teams: the Village or the Werewolves.
+
+## Roles
+
+Each player is assigned one of the following roles:
+
+### Village Team
+
+The goal of the Village team is to exile all the werewolves.
+
+* **Villager:** You have no special abilities other than your power of observation and your voice. Use the discussion phase to identify suspicious behavior and vote to exile suspected werewolves.
+* **Seer:** Each night, you may choose one player to investigate. You will learn if that player is a Werewolf or not. Your goal is to share this information strategically to help the village without revealing your identity too soon.
+* **Doctor:** Each night, you may choose one player to protect. The player you protect cannot be eliminated by the werewolves that night.
+
+### Werewolf Team
+
+* **Werewolf:** Your goal is to eliminate villagers until the number of werewolves equals the number of remaining village members. Each night, you and your fellow werewolves will secretly agree on one player to eliminate.
+
+## Game Phases
+
+The game alternates between a Night phase and a Day phase.
+
+### Night Phase 🐺
+
+During the night, all players close their eyes. The moderator will ask players with special roles to wake up and perform their actions in this order:
+
+1.  **Doctor:** Chooses one player to protect.
+2.  **Seer:** Chooses one player to investigate their alignment.
+3.  **Werewolves:** Silently vote on one player to eliminate.
+
+### Day Phase ☀️
+
+1.  **Announcement:** The moderator announces which player, if any, was eliminated during the night. That player is removed from the game and may not speak or participate further.
+2.  **Discussion:** The surviving players discuss who they think the werewolves are.
+3.  **Exile Vote:** Players vote on who to exile from the village. The player who receives the most votes is exiled, removed from the game, and their role is revealed.
+
+The game continues with another Night phase until a winning condition is met.
+
+## Customizable Rules
+
+Before the game begins, the following options must be decided.
+
+### 1. Doctor's Self-Save
+
+* **Option A (Self-Save Allowed):** The Doctor is allowed to choose themselves as the target of their protection.
+* **Option B (No Self-Save):** The Doctor must choose another player to protect.
+
+### 2. Discussion Protocol
+
+* **Option A (Parallel Discussion):** All players may speak simultaneously for a number of rounds.
+* **Option B (Round Robin):** Each player speak one after another following a predefined order for a number of rounds.
+
+### 3. Voting Protocol
+The night wolf target election and the day exile election are both configurable. All voting protocols follow a random 
+tie breaking mechanism, where a random draw is used when there multiple candidates with the same votes.
+
+* **Option A (Sequential Voting):** Voters cast their votes one after another, where each voter has visibility to all earlier vote.
+* **Option B (Parallel Voting):** All voters cast their votes simultaneously.
+
+## Winning the Game
+
+A team wins as soon as their winning condition is met.
+
+* **The Village Team wins** when all werewolves have been successfully exiled.
+* **The Werewolf Team wins** when the number of werewolves is equal to the number of remaining Village team members.
+
+### Rewards
+
+All members of the winning team will receive **1 reward**. This includes players who were eliminated before the end of the game.
+
+### Tie Game (Forfeit)
+
+If any back-end inference fails during the game, the match will immediately end. The game will be declared a **tie**, and no players will receive a reward.