diff --git a/catanatron/catanatron/players/README_LLM_PLAYER.md b/catanatron/catanatron/players/README_LLM_PLAYER.md new file mode 100644 index 000000000..13e8c5edd --- /dev/null +++ b/catanatron/catanatron/players/README_LLM_PLAYER.md @@ -0,0 +1,170 @@ +# LLM Player + +The `LLMPlayer` is a Catan AI player that uses Large Language Models (LLMs) via Ollama to make game decisions. It sends the current game state and available actions to a local LLM instance and parses the response to select an action. + +## Features + +- **Network-based LLM calls**: Makes HTTP requests to a local Ollama instance for each decision +- **Templated prompts**: Uses a customizable markdown template with LangChain for prompt formatting +- **Simplified state representation**: Shows only essential game statistics to the LLM +- **Robust parsing**: Best-effort parsing of LLM responses with fallback to safe defaults + +## Prerequisites + +### 1. Install Ollama + +Download and install Ollama from [https://ollama.ai/](https://ollama.ai/) + +### 2. Install Python Dependencies + +```bash +pip install langchain langchain-ollama +``` + +### 3. Pull an LLM Model + +```bash +# Pull llama3.2 (recommended, fast and good quality) +ollama pull llama3.2 + +# Or try other models: +ollama pull mistral +ollama pull llama3.1 +``` + +### 4. Start Ollama Server + +```bash +ollama serve +``` + +The server will run on `http://localhost:11434` by default. + +## Usage + +### Basic Usage + +```python +from catanatron import Game, RandomPlayer, Color +from catanatron.players.llm import LLMPlayer + +# Create an LLM player +llm_player = LLMPlayer(Color.RED) + +# Play against random opponents +players = [ + llm_player, + RandomPlayer(Color.BLUE), + RandomPlayer(Color.WHITE), + RandomPlayer(Color.ORANGE), +] + +game = Game(players) +winner = game.play() +print(f"Winner: {winner}") +``` + +### Custom Configuration + +```python +# Use a different model and Ollama URL +llm_player = LLMPlayer( + color=Color.RED, + model_name="mistral", # Change the model + ollama_base_url="http://localhost:11434" # Custom Ollama URL +) +``` + +## How It Works + +### 1. State Extraction + +On each decision, the player extracts simplified game state: +- Your stats: victory points, resources, dev cards, buildings available, bonuses +- Opponent stats: victory points, total resources, bonuses +- Game info: current turn, robber status + +### 2. Prompt Formatting + +The state is formatted using the template in `llm_player_prompt.md`: +- Shows your color and stats +- Lists opponents +- Numbers all available actions (0, 1, 2, ...) +- Asks LLM to respond with just the action number + +### 3. LLM Call + +The formatted prompt is sent to Ollama via HTTP request using LangChain. + +### 4. Response Parsing + +The LLM response is parsed to extract the action number: +- Looks for digits in the response +- Validates the number is in the valid range +- Falls back to action 0 if parsing fails + +## Customizing the Prompt + +You can customize the decision-making prompt by editing: +``` +catanatron/catanatron/players/llm_player_prompt.md +``` + +The template uses LangChain's `PromptTemplate` format with variables like: +- `{color}` - Player's color +- `{your_vp}` - Victory points +- `{your_wood}`, `{your_brick}`, etc. - Resource counts +- `{actions_list}` - Numbered list of available actions +- And more... + +## Performance Considerations + +- **Latency**: Each decision requires a network call to Ollama, which can take 1-10 seconds depending on the model +- **Model choice**: + - `llama3.2` - Good balance of speed and quality + - `mistral` - Faster, good for testing + - `llama3.1` - Slower but higher quality reasoning +- **Temperature**: Set to 0.7 by default for balanced exploration/exploitation + +## Troubleshooting + +### "ImportError: langchain is required" +Install dependencies: +```bash +pip install langchain langchain-ollama +``` + +### "Connection refused" or network errors +1. Check Ollama is running: `ollama serve` +2. Verify the URL: `curl http://localhost:11434` +3. Check firewall settings + +### LLM returns invalid responses +The player has fallback logic: +- Logs a warning message +- Defaults to action 0 (first available action) +- Game continues without crashing + +### Slow performance +- Use a smaller/faster model (e.g., `llama3.2` instead of `llama3.1`) +- Check Ollama's GPU acceleration is working +- Consider using a quantized model + +## Example + +See `examples/llm_player_example.py` for a complete working example. + +## Architecture + +``` +LLMPlayer +├── llm.py # Main player implementation +├── llm_player_prompt.md # Prompt template +└── README_LLM_PLAYER.md # This file +``` + +The implementation uses: +- **LangChain**: For prompt templating and LLM abstraction +- **langchain-ollama**: For Ollama integration +- **HTTP/REST**: Network calls to local Ollama instance +- **Regex parsing**: To extract action numbers from LLM responses diff --git a/catanatron/catanatron/players/llm.py b/catanatron/catanatron/players/llm.py new file mode 100644 index 000000000..5f3419d7e --- /dev/null +++ b/catanatron/catanatron/players/llm.py @@ -0,0 +1,292 @@ +"""LLM-based player that uses Ollama for decision making.""" +import re +import os +from pathlib import Path + +from catanatron.models.player import Player +from catanatron.models.enums import Resource + + +class LLMPlayer(Player): + """ + Player that uses a local Ollama instance to make decisions via LLM. + + This player sends the game state and available actions to an LLM + and parses the response to select an action. + """ + + def __init__(self, color, model_name="llama3.2", ollama_base_url="http://localhost:11434", is_bot=True): + """Initialize the LLM player. + + Args: + color (Color): The player's color + model_name (str): Name of the Ollama model to use (default: llama3.2) + ollama_base_url (str): Base URL for Ollama API (default: http://localhost:11434) + is_bot (bool): Whether this is a bot player + """ + super().__init__(color, is_bot) + self.model_name = model_name + self.ollama_base_url = ollama_base_url + self._prompt_template = None + self._llm = None + + def _get_prompt_template(self): + """Load the prompt template from markdown file.""" + if self._prompt_template is None: + try: + from langchain_core.prompts import PromptTemplate + + template_path = Path(__file__).parent / "llm_player_prompt.md" + with open(template_path, "r") as f: + template_content = f.read() + + self._prompt_template = PromptTemplate.from_template(template_content) + except ImportError: + raise ImportError( + "langchain is required for LLMPlayer. " + "Install it with: pip install langchain langchain-ollama" + ) + return self._prompt_template + + def _get_llm(self): + """Get or create the Ollama LLM instance.""" + if self._llm is None: + try: + from langchain_ollama import OllamaLLM + + self._llm = OllamaLLM( + model=self.model_name, + base_url=self.ollama_base_url, + temperature=0.7, + ) + except ImportError: + raise ImportError( + "langchain-ollama is required for LLMPlayer. " + "Install it with: pip install langchain-ollama" + ) + return self._llm + + def _describe_node_location(self, board, node_id): + """Describe a node location by its adjacent tiles. + + Args: + board: The game board + node_id: The node ID to describe + + Returns: + str: Description like "3 WHEAT - 5 ORE - 6 BRICK" + """ + adjacent_tiles = board.map.adjacent_tiles.get(node_id, []) + + # Build descriptions for each adjacent tile + tile_descriptions = [] + for tile in adjacent_tiles: + if tile.resource is None: # Desert tile + tile_descriptions.append(f"{tile.number or 0} DESERT") + else: + tile_descriptions.append(f"{tile.number or 0} {tile.resource}") + + # Sort for consistency + tile_descriptions.sort() + + return " - ".join(tile_descriptions) if tile_descriptions else "Unknown" + + def _extract_state_info(self, game): + """Extract simplified state information from the game. + + Args: + game: The game instance + + Returns: + dict: Simplified state information + """ + state = game.state + color = self.color + + # Get player state + player_key_prefix = f"P{state.colors.index(color)}_" + + def get_player_stat(stat_name): + return state.player_state.get(f"{player_key_prefix}{stat_name}", 0) + + # Build resource hand as a list + resource_hand = [] + for resource, stat_name in [ + ("WOOD", "WOOD_IN_HAND"), + ("BRICK", "BRICK_IN_HAND"), + ("SHEEP", "SHEEP_IN_HAND"), + ("WHEAT", "WHEAT_IN_HAND"), + ("ORE", "ORE_IN_HAND"), + ]: + count = get_player_stat(stat_name) + resource_hand.extend([resource] * count) + + # Build unused dev cards list + unused_dev_cards = [] + for card, stat_name in [ + ("KNIGHT", "KNIGHT_IN_HAND"), + ("YEAR_OF_PLENTY", "YEAR_OF_PLENTY_IN_HAND"), + ("MONOPOLY", "MONOPOLY_IN_HAND"), + ("ROAD_BUILDING", "ROAD_BUILDING_IN_HAND"), + ("VICTORY_POINT", "VICTORY_POINT_IN_HAND"), + ]: + count = get_player_stat(stat_name) + unused_dev_cards.extend([card] * count) + + # Build used dev cards list (played cards) + used_dev_cards = [] + for card, stat_name in [ + ("KNIGHT", "PLAYED_KNIGHT"), + ("YEAR_OF_PLENTY", "PLAYED_YEAR_OF_PLENTY"), + ("MONOPOLY", "PLAYED_MONOPOLY"), + ("ROAD_BUILDING", "PLAYED_ROAD_BUILDING"), + ]: + count = get_player_stat(stat_name) + used_dev_cards.extend([card] * count) + + # Get buildings with locations + buildings_info = [] + for node_id, (building_color, building_type) in state.board.buildings.items(): + if building_color == color: + location_desc = self._describe_node_location(state.board, node_id) + building_name = "Settlement" if building_type == 1 else "City" + buildings_info.append(f"- {building_name} at {location_desc}") + + # Extract own stats + info = { + "color": color.value, + "your_vp": get_player_stat("VICTORY_POINTS"), + "your_hand": str(resource_hand), + "your_unused_dev_cards": str(unused_dev_cards), + "your_used_dev_cards": str(used_dev_cards), + "your_buildings": "\n".join(buildings_info) if buildings_info else "None", + "your_roads_available": get_player_stat("ROADS_AVAILABLE"), + "your_settlements_available": get_player_stat("SETTLEMENTS_AVAILABLE"), + "your_cities_available": get_player_stat("CITIES_AVAILABLE"), + "has_longest_road": "Yes" if get_player_stat("HAS_ROAD") else "No", + "has_largest_army": "Yes" if get_player_stat("HAS_ARMY") else "No", + "current_turn": state.num_turns, + "robber_active": "Yes" if hasattr(state.board, 'robber_coordinate') else "No", + } + + # Extract opponent stats (only public information) + opponents_info = [] + for i, opponent_color in enumerate(state.colors): + if opponent_color == color: + continue + + opp_prefix = f"P{i}_" + opp_vp = state.player_state.get(f"{opp_prefix}VICTORY_POINTS", 0) + + # Count total resource cards (public info - you can see hand size) + opp_resource_cards = sum([ + state.player_state.get(f"{opp_prefix}WOOD_IN_HAND", 0), + state.player_state.get(f"{opp_prefix}BRICK_IN_HAND", 0), + state.player_state.get(f"{opp_prefix}SHEEP_IN_HAND", 0), + state.player_state.get(f"{opp_prefix}WHEAT_IN_HAND", 0), + state.player_state.get(f"{opp_prefix}ORE_IN_HAND", 0), + ]) + + # Count total dev cards (public info) + opp_dev_cards = sum([ + state.player_state.get(f"{opp_prefix}KNIGHT_IN_HAND", 0), + state.player_state.get(f"{opp_prefix}YEAR_OF_PLENTY_IN_HAND", 0), + state.player_state.get(f"{opp_prefix}MONOPOLY_IN_HAND", 0), + state.player_state.get(f"{opp_prefix}ROAD_BUILDING_IN_HAND", 0), + state.player_state.get(f"{opp_prefix}VICTORY_POINT_IN_HAND", 0), + ]) + + opp_has_longest = "Yes" if state.player_state.get(f"{opp_prefix}HAS_ROAD", False) else "No" + opp_has_largest = "Yes" if state.player_state.get(f"{opp_prefix}HAS_ARMY", False) else "No" + + opponents_info.append( + f"- {opponent_color.value}: {opp_vp} VP, {opp_resource_cards} resource cards, " + f"{opp_dev_cards} dev cards, Longest Road: {opp_has_longest}, Largest Army: {opp_has_largest}" + ) + + info["opponents_stats"] = "\n".join(opponents_info) + + return info + + def _format_actions(self, playable_actions): + """Format playable actions as a numbered list. + + Args: + playable_actions: List of Action objects + + Returns: + str: Formatted actions list + """ + actions_list = [] + for i, action in enumerate(playable_actions): + # Simplify action display + action_desc = f"{action.action_type.value}" + if action.value is not None and action.value != (): + action_desc += f" (value: {action.value})" + actions_list.append(f"{i}. {action_desc}") + + return "\n".join(actions_list) + + def _parse_llm_response(self, response_text, num_actions): + """Parse the LLM response to extract action index. + + Args: + response_text (str): Raw LLM response + num_actions (int): Number of available actions + + Returns: + int: Parsed action index, or 0 if parsing fails + """ + # Try to extract a number from the response + # Look for standalone numbers or numbers at the start/end of lines + numbers = re.findall(r'\b(\d+)\b', response_text.strip()) + + if numbers: + # Take the first number found + action_idx = int(numbers[0]) + + # Validate it's within range + if 0 <= action_idx < num_actions: + return action_idx + + # Fallback: return first action if parsing fails + print(f"Warning: Could not parse LLM response '{response_text}'. Defaulting to action 0.") + return 0 + + def decide(self, game, playable_actions): + """Use LLM to decide which action to take. + + Args: + game: The game instance + playable_actions: List of available actions + + Returns: + Action: The chosen action + """ + try: + # Extract state information + state_info = self._extract_state_info(game) + + # Format actions + state_info["actions_list"] = self._format_actions(playable_actions) + state_info["max_action_index"] = len(playable_actions) - 1 + + # Get prompt template and LLM + prompt_template = self._get_prompt_template() + llm = self._get_llm() + + # Format prompt + formatted_prompt = prompt_template.format(**state_info) + + # Call LLM + response = llm.invoke(formatted_prompt) + + # Parse response + action_idx = self._parse_llm_response(response, len(playable_actions)) + + return playable_actions[action_idx] + + except Exception as e: + print(f"Error in LLMPlayer.decide: {e}") + print(f"Falling back to first action") + return playable_actions[0] diff --git a/catanatron/catanatron/players/llm_player_prompt.md b/catanatron/catanatron/players/llm_player_prompt.md new file mode 100644 index 000000000..cf35895d4 --- /dev/null +++ b/catanatron/catanatron/players/llm_player_prompt.md @@ -0,0 +1,45 @@ +# Catan Game Decision + +You are an AI player in a game of Catan. Your goal is to make the best strategic decision based on the current game state. + +## Your Color +{color} + +## Game State Summary + +### Your Hand & Cards +- Victory Points: {your_vp} +- Your Hand: {your_hand} +- Unused Dev Cards: {your_unused_dev_cards} +- Used Dev Cards: {your_used_dev_cards} + +### Your Buildings +{your_buildings} + +### Buildings Available to Build +- Roads: {your_roads_available} +- Settlements: {your_settlements_available} +- Cities: {your_cities_available} + +### Bonuses +- Has Longest Road: {has_longest_road} +- Has Largest Army: {has_largest_army} + +### Opponents +{opponents_stats} + +### Game Info +- Current Turn: {current_turn} +- Robber Active: {robber_active} + +## Available Actions + +{actions_list} + +## Your Task + +Analyze the game state and available actions. Choose the action that gives you the best strategic advantage to win the game (reach 10 victory points). + +**Respond with ONLY the number of the action you choose (0-{max_action_index}). Do not include any explanation or additional text.** + +Your response: diff --git a/examples/llm_player_example.py b/examples/llm_player_example.py new file mode 100644 index 000000000..c8fdec5a7 --- /dev/null +++ b/examples/llm_player_example.py @@ -0,0 +1,77 @@ +""" +Example of using LLMPlayer with a local Ollama instance. + +This example demonstrates how to use the LLMPlayer that makes decisions +by calling a local Ollama LLM instance via network. + +Prerequisites: +1. Install Ollama: https://ollama.ai/ +2. Install required dependencies: + pip install langchain langchain-ollama +3. Pull a model (e.g., llama3.2): + ollama pull llama3.2 +4. Make sure Ollama is running: + ollama serve +""" + +from catanatron import Game, RandomPlayer, Color +from catanatron.players.llm import LLMPlayer + + +def main(): + """Run a game with an LLM player against random opponents.""" + + # Create players - one LLM player and three random players + players = [ + LLMPlayer(Color.RED, model_name="llama3.2"), # Uses local Ollama + RandomPlayer(Color.BLUE), + RandomPlayer(Color.WHITE), + RandomPlayer(Color.ORANGE), + ] + + # Create and play the game + print("Starting game with LLM player as RED...") + print("This will make network calls to Ollama for each decision.\n") + + game = Game(players) + winner = game.play() + + print(f"\nGame finished! Winner: {winner}") + + # Print final scores + print("\nFinal Scores:") + for color in game.state.colors: + player_idx = game.state.colors.index(color) + vp = game.state.player_state.get(f"P{player_idx}_VICTORY_POINTS", 0) + print(f" {color.value}: {vp} VP") + + +def main_custom_config(): + """Example with custom Ollama configuration.""" + + # Create LLM player with custom settings + llm_player = LLMPlayer( + color=Color.RED, + model_name="mistral", # Use a different model + ollama_base_url="http://localhost:11434", # Custom Ollama URL + ) + + players = [ + llm_player, + RandomPlayer(Color.BLUE), + RandomPlayer(Color.WHITE), + RandomPlayer(Color.ORANGE), + ] + + game = Game(players) + winner = game.play() + + print(f"Winner: {winner}") + + +if __name__ == "__main__": + # Run the basic example + main() + + # Uncomment to run the custom config example: + # main_custom_config()