SY3141 · SY3141 · Mar 31, 2026 · Mar 30, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/catanatron/catanatron/apply_action.py b/catanatron/catanatron/apply_action.py
@@ -94,7 +94,7 @@ def apply_action(
     elif action.action_type == ActionType.ROLL:
         action_record = apply_roll(state, action, action_record)
     elif action.action_type == ActionType.DISCARD_RESOURCE:
-        action_record = apply_discard(state, action, action_record)
+        action_record = apply_discard(state, action)
     elif action.action_type == ActionType.MOVE_ROBBER:
         action_record = apply_move_robber(state, action, action_record)
     elif action.action_type == ActionType.PLAY_KNIGHT_CARD:
@@ -266,26 +266,25 @@ def apply_roll(state: State, action: Action, action_record=None):
     action = Action(action.color, action.action_type, dices)
 
     if number == 7:
-        state.discard_counts = {
-            color: (
-                player_num_resource_cards(state, color) // 2
-                if player_num_resource_cards(state, color) > state.discard_limit
-                else 0
-            )
-            for color in state.colors
-        }
-        should_enter_discarding_sequence = any(state.discard_counts.values())
+        discard_counts = []
+        first_discarding_player_index = None
 
-        if should_enter_discarding_sequence:
-            state.current_player_index = next(
-                i
-                for i, color in enumerate(state.colors)
-                if state.discard_counts[color] > 0
-            )
+        for i, color in enumerate(state.colors):
+            num_cards = player_num_resource_cards(state, color)
+            discard_count = num_cards // 2 if num_cards > state.discard_limit else 0
+            discard_counts.append(discard_count)
+
+            if discard_count > 0 and first_discarding_player_index is None:
+                first_discarding_player_index = i
+
+        state.discard_counts = discard_counts
+
+        if first_discarding_player_index is not None:
+            state.current_player_index = first_discarding_player_index
             state.current_prompt = ActionPrompt.DISCARD
             state.is_discarding = True
         else:
-            state.discard_counts = {color: 0 for color in state.colors}
+            state.discard_counts = [0] * len(state.colors)
             # state.current_player_index stays the same
             state.current_prompt = ActionPrompt.MOVE_ROBBER
             state.is_moving_knight = True
@@ -304,55 +303,42 @@ def apply_roll(state: State, action: Action, action_record=None):
     return ActionRecord(action=action, result=dices)
 
 
-def normalize_discarded_cards(state: State, action: Action, action_record=None):
+def apply_discard(state: State, action: Action):
     discarded = action.value
-    if discarded is None and action_record is not None:
-        discarded = action_record.result
-
-    if discarded is None:
-        raise ValueError("Discard action requires a resource")
-
-    if isinstance(discarded, (list, tuple)):
-        if len(discarded) != 1:
-            raise ValueError("Discard action must specify exactly 1 resource")
-        return discarded[0]
-
-    return discarded
-
-
-def apply_discard(state: State, action: Action, action_record=None):
-    discarded = normalize_discarded_cards(state, action, action_record)
-    remaining = state.discard_counts[action.color]
-    if remaining <= 0:
-        raise ValueError("Trying to discard more cards than required")
+    player_index = state.color_to_index[action.color]
+    remaining = state.discard_counts[player_index]
+    assert remaining > 0, "Trying to discard when not required"
 
     to_discard = freqdeck_from_listdeck([discarded])
-
     player_freqdeck_subtract(state, action.color, to_discard)
     state.resource_freqdeck = freqdeck_add(state.resource_freqdeck, to_discard)
-    state.discard_counts[action.color] = remaining - 1
+    state.discard_counts[player_index] -= 1
     action = Action(action.color, action.action_type, discarded)
 
-    if state.discard_counts[action.color] > 0:
+    if state.discard_counts[player_index] > 0:
         # state.current_player_index stays the same
         # state.current_prompt stays the same
         pass
     else:
-        discarders_left = [state.discard_counts[color] > 0 for color in state.colors][
-            state.current_player_index + 1 :
-        ]
-        if any(discarders_left):
-            to_skip = discarders_left.index(True)
-            state.current_player_index = state.current_player_index + 1 + to_skip
+        next_discarder_index = next(
+            (
+                i
+                for i in range(state.current_player_index + 1, len(state.colors))
+                if state.discard_counts[i] > 0
+            ),
+            None,
+        )
+        if next_discarder_index is not None:
+            state.current_player_index = next_discarder_index
             # state.current_prompt stays the same
         else:
             state.current_player_index = state.current_turn_index
             state.current_prompt = ActionPrompt.MOVE_ROBBER
             state.is_discarding = False
             state.is_moving_knight = True
-            state.discard_counts = {color: 0 for color in state.colors}
+            state.discard_counts = [0] * len(state.colors)
 
-    return ActionRecord(action=action, result=[discarded])
+    return ActionRecord(action=action, result=discarded)
 
 
 def apply_move_robber(state: State, action: Action, action_record=None):

diff --git a/catanatron/catanatron/gym/envs/action_space.py b/catanatron/catanatron/gym/envs/action_space.py
@@ -15,6 +15,8 @@ def get_action_array(
     catan_map = build_map(map_type)
     num_nodes = len(catan_map.land_nodes)
 
+    # We sort the actions to ensure a consistent ordering and reproducibility
+    # without sorting, we couldn't get gym usages to be reproducible
     actions_array = sorted(
         [
             (ActionType.ROLL, None),
@@ -67,9 +69,7 @@ def get_action_array(
             ],
             (ActionType.END_TURN, None),
         ],
-        # Preserve historical DISCARD ordering so the rename does not reshuffle
-        # integer action ids for gym consumers.
-        key=lambda action: str(action).replace("DISCARD_RESOURCE", "DISCARD"),
+        key=lambda action: str(action),
     )
     return actions_array
 

diff --git a/catanatron/catanatron/json.py b/catanatron/catanatron/json.py
@@ -30,8 +30,6 @@ def action_from_json(data) -> Action:
         if len(resources) not in [1, 2]:
             raise ValueError("Year of Plenty action must have 1 or 2 resources")
         action = Action(color, action_type, resources)
-    elif action_type == ActionType.DISCARD_RESOURCE:
-        action = Action(color, action_type, data[2])
     elif action_type == ActionType.MOVE_ROBBER:
         coordinate, victim = data[2]
         coordinate = tuple(coordinate)
@@ -99,6 +97,9 @@ def default(self, obj):
                 "robber_coordinate": obj.state.board.robber_coordinate,
                 "current_color": obj.state.current_color(),
                 "current_prompt": obj.state.current_prompt,
+                "current_discard_count": obj.state.discard_counts[
+                    obj.state.current_player_index
+                ],
                 "current_playable_actions": obj.playable_actions,
                 "longest_roads_by_player": longest_roads_by_player(obj.state),
                 "winning_color": obj.winning_color(),

diff --git a/catanatron/catanatron/models/actions.py b/catanatron/catanatron/models/actions.py
@@ -279,7 +279,7 @@ def initial_road_possibilities(state, color) -> List[Action]:
 
 
 def discard_possibilities(state: State, color) -> List[Action]:
-    if state.discard_counts[color] <= 0:
+    if state.discard_counts[state.color_to_index[color]] <= 0:
         return []
 
     return [

diff --git a/catanatron/catanatron/models/enums.py b/catanatron/catanatron/models/enums.py
@@ -131,7 +131,7 @@ def __repr__(self):
 
 The "result" field is polymorphic depending on the action_type.
 - ROLL: result is (int, int) 2 dice rolled
-- DISCARD_RESOURCE: result is List[Resource] discarded in this action
+- DISCARD_RESOURCE: result is Resource discarded in this action
 - MOVE_ROBBER: result is card stolen (Resource|None)
 - BUY_DEVELOPMENT_CARD: result is card
 - ...for the rest, result is None since they are deterministic actions

diff --git a/catanatron/catanatron/state.py b/catanatron/catanatron/state.py
@@ -76,7 +76,7 @@ class State:
         current_prompt (ActionPrompt): DEPRECATED. Not needed; use is_initial_build_phase,
             is_moving_knight, etc... instead.
         is_discarding (bool): If current player needs to discard.
-        discard_counts (Dict[Color, int]): Remaining number of cards each player
+        discard_counts (List[int]): Color-index aligned number of cards each player
             must discard in the current discard sequence.
         is_moving_knight (bool): If current player needs to move robber.
         is_road_building (bool): If current player needs to build free roads per Road
@@ -133,7 +133,7 @@ def __init__(
             self.current_prompt = ActionPrompt.BUILD_INITIAL_SETTLEMENT
             self.is_initial_build_phase = True
             self.is_discarding = False
-            self.discard_counts: Dict[Color, int] = {color: 0 for color in self.colors}
+            self.discard_counts: List[int] = [0] * len(self.colors)
             self.is_moving_knight = False
             self.is_road_building = False
             self.free_roads_available = 0

diff --git a/tests/models/test_actions.py b/tests/models/test_actions.py
@@ -60,7 +60,7 @@ def test_monopoly_possible_actions():
 def test_discard_possibilities_are_per_resource():
     player = SimplePlayer(Color.RED)
     state = State([player])
-    state.discard_counts[player.color] = 2
+    state.discard_counts[0] = 2
 
     player_deck_replenish(state, player.color, WHEAT, 2)
     player_deck_replenish(state, player.color, BRICK, 1)
@@ -84,7 +84,7 @@ def test_discard_possibilities_empty_when_player_does_not_need_to_discard():
 def test_discard_possibilities_do_not_repeat_same_resource():
     player = SimplePlayer(Color.RED)
     state = State([player])
-    state.discard_counts[player.color] = 3
+    state.discard_counts[0] = 3
 
     player_deck_replenish(state, player.color, WHEAT, 3)
 
@@ -96,7 +96,7 @@ def test_discard_possibilities_do_not_repeat_same_resource():
 def test_discard_possibilities_include_each_resource_in_resource_order():
     player = SimplePlayer(Color.RED)
     state = State([player])
-    state.discard_counts[player.color] = len(RESOURCES)
+    state.discard_counts[0] = len(RESOURCES)
 
     for resource in RESOURCES:
         player_deck_replenish(state, player.color, resource)

diff --git a/tests/test_json.py b/tests/test_json.py
@@ -60,12 +60,6 @@ def test_action_from_json_discard():
     assert action.value == WOOD
 
 
-def test_action_from_json_discard_rejects_list_value():
-    data = ["BLUE", "DISCARD_RESOURCE", [WOOD]]
-    with pytest.raises(ValueError, match="Discard action must have 1 resource"):
-        action_from_json(data)
-
-
 def test_action_from_json_play_year_of_plenty_invalid():
     data = ["WHITE", "PLAY_YEAR_OF_PLENTY", [WOOD, BRICK, SHEEP]]
     with pytest.raises(

diff --git a/tests/test_state.py b/tests/test_state.py
@@ -221,18 +221,50 @@ def test_discard_sequence_tracks_original_hand_size():
     state.current_player_index = current_player_index
     state.current_prompt = ActionPrompt.DISCARD
     state.is_discarding = True
-    state.discard_counts[color] = 4
+    state.discard_counts[current_player_index] = 4
     player_deck_replenish(state, color, BRICK, 1)
     player_deck_replenish(state, color, WHEAT, 3)
 
     apply_action(state, Action(color, ActionType.DISCARD_RESOURCE, BRICK))
     assert state.current_color() == color
     assert state.is_discarding
-    assert state.discard_counts[color] == 3
+    assert state.discard_counts[current_player_index] == 3
 
     apply_action(state, Action(color, ActionType.DISCARD_RESOURCE, WHEAT))
     apply_action(state, Action(color, ActionType.DISCARD_RESOURCE, WHEAT))
     apply_action(state, Action(color, ActionType.DISCARD_RESOURCE, WHEAT))
     assert not state.is_discarding
     assert state.is_moving_knight
     assert state.current_player_index == turn_player_index
+
+
+def test_discard_sequence_advances_to_next_discarder():
+    players = [
+        SimplePlayer(Color.RED),
+        SimplePlayer(Color.BLUE),
+        SimplePlayer(Color.WHITE),
+    ]
+    state = State(players)
+    current_color = players[0].color
+    next_color = players[2].color
+    current_player_index = state.colors.index(current_color)
+    next_player_index = state.colors.index(next_color)
+
+    state.current_turn_index = 1
+    state.current_player_index = current_player_index
+    state.current_prompt = ActionPrompt.DISCARD
+    state.is_discarding = True
+    state.discard_counts[current_player_index] = 1
+    state.discard_counts[next_player_index] = 2
+
+    player_deck_replenish(state, current_color, BRICK, 1)
+
+    apply_action(
+        state,
+        Action(current_color, ActionType.DISCARD_RESOURCE, BRICK),
+    )
+
+    assert state.current_player_index == next_player_index
+    assert state.current_color() == next_color
+    assert state.is_discarding
+    assert state.current_prompt == ActionPrompt.DISCARD