Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 34 additions & 48 deletions catanatron/catanatron/apply_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def apply_action(
elif action.action_type == ActionType.ROLL:
action_record = apply_roll(state, action, action_record)
elif action.action_type == ActionType.DISCARD_RESOURCE:
action_record = apply_discard(state, action, action_record)
action_record = apply_discard(state, action)
elif action.action_type == ActionType.MOVE_ROBBER:
action_record = apply_move_robber(state, action, action_record)
elif action.action_type == ActionType.PLAY_KNIGHT_CARD:
Expand Down Expand Up @@ -266,26 +266,25 @@ def apply_roll(state: State, action: Action, action_record=None):
action = Action(action.color, action.action_type, dices)

if number == 7:
state.discard_counts = {
color: (
player_num_resource_cards(state, color) // 2
if player_num_resource_cards(state, color) > state.discard_limit
else 0
)
for color in state.colors
}
should_enter_discarding_sequence = any(state.discard_counts.values())
discard_counts = []
first_discarding_player_index = None

if should_enter_discarding_sequence:
state.current_player_index = next(
i
for i, color in enumerate(state.colors)
if state.discard_counts[color] > 0
)
for i, color in enumerate(state.colors):
num_cards = player_num_resource_cards(state, color)
discard_count = num_cards // 2 if num_cards > state.discard_limit else 0
discard_counts.append(discard_count)

if discard_count > 0 and first_discarding_player_index is None:
first_discarding_player_index = i

state.discard_counts = discard_counts

if first_discarding_player_index is not None:
state.current_player_index = first_discarding_player_index
state.current_prompt = ActionPrompt.DISCARD
state.is_discarding = True
else:
state.discard_counts = {color: 0 for color in state.colors}
state.discard_counts = [0] * len(state.colors)
# state.current_player_index stays the same
state.current_prompt = ActionPrompt.MOVE_ROBBER
state.is_moving_knight = True
Expand All @@ -304,55 +303,42 @@ def apply_roll(state: State, action: Action, action_record=None):
return ActionRecord(action=action, result=dices)


def normalize_discarded_cards(state: State, action: Action, action_record=None):
def apply_discard(state: State, action: Action):
discarded = action.value
if discarded is None and action_record is not None:
discarded = action_record.result

if discarded is None:
raise ValueError("Discard action requires a resource")

if isinstance(discarded, (list, tuple)):
if len(discarded) != 1:
raise ValueError("Discard action must specify exactly 1 resource")
return discarded[0]

return discarded


def apply_discard(state: State, action: Action, action_record=None):
discarded = normalize_discarded_cards(state, action, action_record)
remaining = state.discard_counts[action.color]
if remaining <= 0:
raise ValueError("Trying to discard more cards than required")
player_index = state.color_to_index[action.color]
remaining = state.discard_counts[player_index]
assert remaining > 0, "Trying to discard when not required"

to_discard = freqdeck_from_listdeck([discarded])

player_freqdeck_subtract(state, action.color, to_discard)
state.resource_freqdeck = freqdeck_add(state.resource_freqdeck, to_discard)
state.discard_counts[action.color] = remaining - 1
state.discard_counts[player_index] -= 1
action = Action(action.color, action.action_type, discarded)

if state.discard_counts[action.color] > 0:
if state.discard_counts[player_index] > 0:
# state.current_player_index stays the same
# state.current_prompt stays the same
pass
else:
discarders_left = [state.discard_counts[color] > 0 for color in state.colors][
state.current_player_index + 1 :
]
if any(discarders_left):
to_skip = discarders_left.index(True)
state.current_player_index = state.current_player_index + 1 + to_skip
next_discarder_index = next(
(
i
for i in range(state.current_player_index + 1, len(state.colors))
if state.discard_counts[i] > 0
),
None,
)
if next_discarder_index is not None:
state.current_player_index = next_discarder_index
# state.current_prompt stays the same
else:
state.current_player_index = state.current_turn_index
state.current_prompt = ActionPrompt.MOVE_ROBBER
state.is_discarding = False
state.is_moving_knight = True
state.discard_counts = {color: 0 for color in state.colors}
state.discard_counts = [0] * len(state.colors)

return ActionRecord(action=action, result=[discarded])
return ActionRecord(action=action, result=discarded)


def apply_move_robber(state: State, action: Action, action_record=None):
Expand Down
6 changes: 3 additions & 3 deletions catanatron/catanatron/gym/envs/action_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def get_action_array(
catan_map = build_map(map_type)
num_nodes = len(catan_map.land_nodes)

# We sort the actions to ensure a consistent ordering and reproducibility
# without sorting, we couldn't get gym usages to be reproducible
actions_array = sorted(
[
(ActionType.ROLL, None),
Expand Down Expand Up @@ -67,9 +69,7 @@ def get_action_array(
],
(ActionType.END_TURN, None),
],
# Preserve historical DISCARD ordering so the rename does not reshuffle
# integer action ids for gym consumers.
key=lambda action: str(action).replace("DISCARD_RESOURCE", "DISCARD"),
key=lambda action: str(action),
)
return actions_array

Expand Down
5 changes: 3 additions & 2 deletions catanatron/catanatron/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ def action_from_json(data) -> Action:
if len(resources) not in [1, 2]:
raise ValueError("Year of Plenty action must have 1 or 2 resources")
action = Action(color, action_type, resources)
elif action_type == ActionType.DISCARD_RESOURCE:
action = Action(color, action_type, data[2])
elif action_type == ActionType.MOVE_ROBBER:
coordinate, victim = data[2]
coordinate = tuple(coordinate)
Expand Down Expand Up @@ -99,6 +97,9 @@ def default(self, obj):
"robber_coordinate": obj.state.board.robber_coordinate,
"current_color": obj.state.current_color(),
"current_prompt": obj.state.current_prompt,
"current_discard_count": obj.state.discard_counts[
obj.state.current_player_index
],
"current_playable_actions": obj.playable_actions,
"longest_roads_by_player": longest_roads_by_player(obj.state),
"winning_color": obj.winning_color(),
Expand Down
2 changes: 1 addition & 1 deletion catanatron/catanatron/models/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def initial_road_possibilities(state, color) -> List[Action]:


def discard_possibilities(state: State, color) -> List[Action]:
if state.discard_counts[color] <= 0:
if state.discard_counts[state.color_to_index[color]] <= 0:
return []

return [
Expand Down
2 changes: 1 addition & 1 deletion catanatron/catanatron/models/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def __repr__(self):

The "result" field is polymorphic depending on the action_type.
- ROLL: result is (int, int) 2 dice rolled
- DISCARD_RESOURCE: result is List[Resource] discarded in this action
- DISCARD_RESOURCE: result is Resource discarded in this action
- MOVE_ROBBER: result is card stolen (Resource|None)
- BUY_DEVELOPMENT_CARD: result is card
- ...for the rest, result is None since they are deterministic actions
Expand Down
4 changes: 2 additions & 2 deletions catanatron/catanatron/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class State:
current_prompt (ActionPrompt): DEPRECATED. Not needed; use is_initial_build_phase,
is_moving_knight, etc... instead.
is_discarding (bool): If current player needs to discard.
discard_counts (Dict[Color, int]): Remaining number of cards each player
discard_counts (List[int]): Color-index aligned number of cards each player
must discard in the current discard sequence.
is_moving_knight (bool): If current player needs to move robber.
is_road_building (bool): If current player needs to build free roads per Road
Expand Down Expand Up @@ -133,7 +133,7 @@ def __init__(
self.current_prompt = ActionPrompt.BUILD_INITIAL_SETTLEMENT
self.is_initial_build_phase = True
self.is_discarding = False
self.discard_counts: Dict[Color, int] = {color: 0 for color in self.colors}
self.discard_counts: List[int] = [0] * len(self.colors)
self.is_moving_knight = False
self.is_road_building = False
self.free_roads_available = 0
Expand Down
6 changes: 3 additions & 3 deletions tests/models/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_monopoly_possible_actions():
def test_discard_possibilities_are_per_resource():
player = SimplePlayer(Color.RED)
state = State([player])
state.discard_counts[player.color] = 2
state.discard_counts[0] = 2

player_deck_replenish(state, player.color, WHEAT, 2)
player_deck_replenish(state, player.color, BRICK, 1)
Expand All @@ -84,7 +84,7 @@ def test_discard_possibilities_empty_when_player_does_not_need_to_discard():
def test_discard_possibilities_do_not_repeat_same_resource():
player = SimplePlayer(Color.RED)
state = State([player])
state.discard_counts[player.color] = 3
state.discard_counts[0] = 3

player_deck_replenish(state, player.color, WHEAT, 3)

Expand All @@ -96,7 +96,7 @@ def test_discard_possibilities_do_not_repeat_same_resource():
def test_discard_possibilities_include_each_resource_in_resource_order():
player = SimplePlayer(Color.RED)
state = State([player])
state.discard_counts[player.color] = len(RESOURCES)
state.discard_counts[0] = len(RESOURCES)

for resource in RESOURCES:
player_deck_replenish(state, player.color, resource)
Expand Down
6 changes: 0 additions & 6 deletions tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,6 @@ def test_action_from_json_discard():
assert action.value == WOOD


def test_action_from_json_discard_rejects_list_value():
data = ["BLUE", "DISCARD_RESOURCE", [WOOD]]
with pytest.raises(ValueError, match="Discard action must have 1 resource"):
action_from_json(data)


def test_action_from_json_play_year_of_plenty_invalid():
data = ["WHITE", "PLAY_YEAR_OF_PLENTY", [WOOD, BRICK, SHEEP]]
with pytest.raises(
Expand Down
36 changes: 34 additions & 2 deletions tests/test_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,18 +221,50 @@ def test_discard_sequence_tracks_original_hand_size():
state.current_player_index = current_player_index
state.current_prompt = ActionPrompt.DISCARD
state.is_discarding = True
state.discard_counts[color] = 4
state.discard_counts[current_player_index] = 4
player_deck_replenish(state, color, BRICK, 1)
player_deck_replenish(state, color, WHEAT, 3)

apply_action(state, Action(color, ActionType.DISCARD_RESOURCE, BRICK))
assert state.current_color() == color
assert state.is_discarding
assert state.discard_counts[color] == 3
assert state.discard_counts[current_player_index] == 3

apply_action(state, Action(color, ActionType.DISCARD_RESOURCE, WHEAT))
apply_action(state, Action(color, ActionType.DISCARD_RESOURCE, WHEAT))
apply_action(state, Action(color, ActionType.DISCARD_RESOURCE, WHEAT))
assert not state.is_discarding
assert state.is_moving_knight
assert state.current_player_index == turn_player_index


def test_discard_sequence_advances_to_next_discarder():
players = [
SimplePlayer(Color.RED),
SimplePlayer(Color.BLUE),
SimplePlayer(Color.WHITE),
]
state = State(players)
current_color = players[0].color
next_color = players[2].color
current_player_index = state.colors.index(current_color)
next_player_index = state.colors.index(next_color)

state.current_turn_index = 1
state.current_player_index = current_player_index
state.current_prompt = ActionPrompt.DISCARD
state.is_discarding = True
state.discard_counts[current_player_index] = 1
state.discard_counts[next_player_index] = 2

player_deck_replenish(state, current_color, BRICK, 1)

apply_action(
state,
Action(current_color, ActionType.DISCARD_RESOURCE, BRICK),
)

assert state.current_player_index == next_player_index
assert state.current_color() == next_color
assert state.is_discarding
assert state.current_prompt == ActionPrompt.DISCARD
Loading