Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,6 @@ files/ # Ignore specific directory if generated

# OS generated files
.DS_Store
Thumbs.db
Thumbs.db
.trieye_data
.trieye_data/
8,640 changes: 8,640 additions & 0 deletions .resumed.txt

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@

# File: MANIFEST.in
# File: MANIFEST.in
include README.md
include LICENSE
Expand All @@ -15,12 +14,20 @@ prune alphatriangle/visualization
prune alphatriangle/interaction
# REMOVE MCTS pruning
# prune alphatriangle/mcts
# Remove Trieye-replaced directories
prune alphatriangle/stats
prune alphatriangle/data
# Remove pruned files
global-exclude alphatriangle/app.py
# Remove pruned test directories
prune tests/visualization
prune tests/interaction
# REMOVE MCTS test pruning
# prune tests/mcts
# Remove Trieye-replaced test directories
prune tests/stats
prune tests/data
# Remove pruned core files
global-exclude alphatriangle/rl/core/visual_state_actor.py
global-exclude alphatriangle/rl/core/visual_state_actor.py
# REMOVE test_save_resume.py
global-exclude tests/training/test_save_resume.py
122 changes: 64 additions & 58 deletions README.md

Large diffs are not rendered by default.

118 changes: 79 additions & 39 deletions alphatriangle/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# File: alphatriangle/cli.py
import logging
import shutil
import subprocess
Expand All @@ -8,9 +9,12 @@
from rich.console import Console
from rich.panel import Panel

# Import Trieye config
from trieye import PersistenceConfig, TrieyeConfig

# Import alphatriangle specific configs and runner
from alphatriangle.config import (
PersistenceConfig,
APP_NAME, # Use APP_NAME from config
TrainConfig,
)
from alphatriangle.logging_config import setup_logging # Import centralized setup
Expand Down Expand Up @@ -61,6 +65,15 @@
),
]

RunNameOption = Annotated[
str | None,
typer.Option(
"--run-name",
help="Specify a custom name for the run (overrides default timestamp).",
),
]


HostOption = Annotated[
str, typer.Option(help="The network address to listen on (default: 127.0.0.1).")
]
Expand Down Expand Up @@ -106,7 +119,8 @@ def _run_external_ui(
console.print(
f"[bold red]Error:[/bold red] {ui_name} command failed with exit code {process.returncode}"
)
raise typer.Exit(code=process.returncode)
# Don't exit immediately, let the calling function handle it if needed
# raise typer.Exit(code=process.returncode)
except FileNotFoundError as e:
console.print(
f"[bold red]Error:[/bold red] '{executable}' command not found. Is {ui_name} installed and in your PATH?"
Expand All @@ -129,54 +143,64 @@ def train(
log_level: LogLevelOption = "INFO",
seed: SeedOption = 42,
profile: ProfileOption = False,
run_name: RunNameOption = None, # Add run_name option
):
"""
🚀 Run the AlphaTriangle training pipeline (headless).

Initiates the self-play and learning process. Logs will be saved to the run directory.
Initiates the self-play and learning process. Uses Trieye for stats/persistence.
Logs will be saved to the run directory within `.trieye_data/alphatriangle/runs/`.
This command also initializes Ray and starts the Ray Dashboard. Check the logs for the dashboard URL.
"""
# Setup logging using the centralized function (file logging handled by runner)
# Setup logging using the centralized function (file logging handled by Trieye)
setup_logging(log_level)
logging.getLogger(__name__) # Get logger after setup

# Use alphatriangle configs here
# Use alphatriangle TrainConfig
train_config_override = TrainConfig()
persist_config_override = PersistenceConfig()
train_config_override.RANDOM_SEED = seed
train_config_override.PROFILE_WORKERS = profile # Set profile config
# Ensure run name is set for persistence config
persist_config_override.RUN_NAME = train_config_override.RUN_NAME

# Create TrieyeConfig, overriding run_name if provided
trieye_config_override = TrieyeConfig(app_name=APP_NAME)
if run_name:
trieye_config_override.run_name = run_name
# Sync run_name to persistence config within TrieyeConfig
trieye_config_override.persistence.RUN_NAME = run_name
else:
# Use the default factory-generated run_name from TrieyeConfig
run_name = trieye_config_override.run_name

console.print(
Panel(
f"Starting Training Run: '[bold cyan]{train_config_override.RUN_NAME}[/]'\n"
f"Starting Training Run: '[bold cyan]{run_name}[/]'\n"
f"Seed: {seed}, Log Level: {log_level.upper()}, Profiling: {'✅ Enabled' if profile else '❌ Disabled'}",
title="[bold green]Training Setup[/]",
border_style="green",
expand=False,
)
)

# Call the single runner function directly, passing the profile flag
# Call the single runner function directly, passing configs
exit_code = run_training(
log_level_str=log_level,
train_config_override=train_config_override,
persist_config_override=persist_config_override,
trieye_config_override=trieye_config_override,
profile=profile,
)

if exit_code == 0:
console.print(
Panel(
f"✅ Training run '[bold cyan]{train_config_override.RUN_NAME}[/]' completed successfully.",
f"✅ Training run '[bold cyan]{run_name}[/]' completed successfully.",
title="[bold green]Training Finished[/]",
border_style="green",
)
)
else:
console.print(
Panel(
f"❌ Training run '[bold cyan]{train_config_override.RUN_NAME}[/]' failed with exit code {exit_code}.",
f"❌ Training run '[bold cyan]{run_name}[/]' failed with exit code {exit_code}.",
title="[bold red]Training Failed[/]",
border_style="red",
)
Expand All @@ -192,11 +216,11 @@ def ml(
"""
📊 Launch the MLflow UI for experiment tracking.

Requires MLflow to be installed. Points to the `.alphatriangle_data/mlruns` directory.
Requires MLflow to be installed. Points to the `.trieye_data/<app_name>/mlruns` directory.
"""
setup_logging("INFO") # Basic logging for this command
persist_config = PersistenceConfig()
# Use the computed property which resolves the path and creates the dir
# Use Trieye's PersistenceConfig to find the path
persist_config = PersistenceConfig(APP_NAME=APP_NAME)
mlflow_uri = persist_config.MLFLOW_TRACKING_URI
mlflow_path = persist_config.get_mlflow_abs_path()

Expand All @@ -217,7 +241,15 @@ def ml(
"--port",
str(port),
]
_run_external_ui("mlflow", command_args, "MLflow UI", f"http://{host}:{port}")
try:
_run_external_ui("mlflow", command_args, "MLflow UI", f"http://{host}:{port}")
except typer.Exit as e:
if e.exit_code != 0:
console.print(
f"[yellow]MLflow UI failed to start (Exit Code: {e.exit_code}). "
f"Is port {port} already in use? Try specifying a different port with --port.[/]"
)
sys.exit(e.exit_code)


@app.command()
Expand All @@ -228,11 +260,11 @@ def tb(
"""
📈 Launch TensorBoard UI pointing to the runs directory.

Requires TensorBoard to be installed. Points to the `.alphatriangle_data/runs` directory.
Requires TensorBoard to be installed. Points to the `.trieye_data/<app_name>/runs` directory.
"""
setup_logging("INFO") # Basic logging for this command
persist_config = PersistenceConfig()
# Point to the parent directory containing all individual run folders
# Use Trieye's PersistenceConfig to find the path
persist_config = PersistenceConfig(APP_NAME=APP_NAME)
runs_root_dir = persist_config.get_runs_root_dir()

if not runs_root_dir.exists() or not any(runs_root_dir.iterdir()):
Expand All @@ -253,38 +285,46 @@ def tb(
"--port",
str(port),
]
_run_external_ui(
"tensorboard", command_args, "TensorBoard UI", f"http://{host}:{port}"
)
try:
_run_external_ui(
"tensorboard", command_args, "TensorBoard UI", f"http://{host}:{port}"
)
except typer.Exit as e:
if e.exit_code != 0:
console.print(
f"[yellow]TensorBoard UI failed to start (Exit Code: {e.exit_code}). "
f"Is port {port} already in use? Try specifying a different port with --port.[/]"
)
sys.exit(e.exit_code)


@app.command()
def ray(
host: HostOption = "127.0.0.1",
host: HostOption = "127.0.0.1", # Keep host/port options for reference
port: PortOption = 8265,
):
"""
☀️ Launch the Ray Dashboard web UI.
☀️ Provides instructions to view the Ray Dashboard.

Requires Ray to be installed and potentially running (e.g., started by `alphatriangle train`).
The dashboard is automatically started when you run `alphatriangle train`.
Check the output logs of the `train` command for the correct URL.
"""
setup_logging("INFO") # Basic logging for this command
console.print(
"[yellow]Note:[/yellow] This command attempts to open the Ray Dashboard for an existing Ray cluster."
)
console.print(
"If Ray is not running, this command might fail. Start Ray first (e.g., via `alphatriangle train`)."
Panel(
f"💡 To view the Ray Dashboard:\n\n"
f"1. The Ray Dashboard is started automatically when you run the `[bold]alphatriangle train[/]` command.\n"
f"2. Check the console output or the log file for the `train` command (located in `.trieye_data/{APP_NAME}/runs/<run_name>/logs/`).\n"
f"3. Look for a line similar to: '[bold cyan]Ray Dashboard running at: http://<address>:<port>[/]' \n"
f"4. Open that specific URL in your web browser.\n\n"
f"[dim]Note: The default URL is often http://{host}:{port}, but it might differ. "
f"If you cannot access the URL, check firewall settings or if the port is blocked.[/]",
title="[bold yellow]Ray Dashboard Instructions[/]",
border_style="yellow",
expand=False,
)
)

command_args = [
"dashboard",
"--host",
host,
"--port",
str(port),
]
_run_external_ui("ray", command_args, "Ray Dashboard", f"http://{host}:{port}")


if __name__ == "__main__":
app()
22 changes: 9 additions & 13 deletions alphatriangle/config/README.md
Original file line number Diff line number Diff line change
@@ -1,33 +1,29 @@

# Configuration Module (`alphatriangle.config`)

## Purpose and Architecture

This module centralizes all configuration parameters for the AlphaTriangle project *except* for the core environment settings. It uses separate **Pydantic models** for different aspects of the application (model, training, persistence, MCTS, **statistics**) to promote modularity, clarity, and automatic validation.
This module centralizes configuration parameters for the AlphaTriangle agent itself, *excluding* statistics logging and data persistence which are now handled by the `trieye` library. It uses separate **Pydantic models** for different aspects of the agent (model, training loop, MCTS) to promote modularity, clarity, and automatic validation.

**Core environment configuration (`EnvConfig`) is now defined and imported directly from the `trianglengin` library.**
**Core environment configuration (`EnvConfig`) is imported directly from the `trianglengin` library.**
**Statistics and Persistence configuration (`StatsConfig`, `PersistenceConfig`) are defined and managed within the `trieye` library via `TrieyeConfig`.**

- **Modularity:** Separating configurations makes it easier to manage parameters for different components.
- **Type Safety & Validation:** Using Pydantic models (`BaseModel`) provides strong type hinting, automatic parsing, and validation of configuration values based on defined types and constraints (e.g., `Field(gt=0)`).
- **Validation Script:** The [`validation.py`](validation.py) script instantiates all configuration models (including importing and validating `trianglengin.EnvConfig`), triggering Pydantic's validation, and prints a summary.
- **Dynamic Defaults:** Some configurations, like `RUN_NAME` in `TrainConfig`, use `default_factory` for dynamic defaults (e.g., timestamp).
- **Computed Fields:** Properties like `MLFLOW_TRACKING_URI` in `PersistenceConfig` are defined using `@computed_field` for clarity.
- **Tuned Defaults:** The default values in `TrainConfig` and `ModelConfig` are tuned for substantial learning runs. `AlphaTriangleMCTSConfig` defaults to 128 simulations. `StatsConfig` defines a default set of metrics to track.
- **Data Paths:** `PersistenceConfig` defines the structure within the `.alphatriangle_data` directory where all local artifacts (runs, checkpoints, logs, TensorBoard data) and MLflow data (`mlruns`) are stored.
- **Validation Script:** The [`validation.py`](validation.py) script instantiates the AlphaTriangle-specific configuration models (including importing and validating `trianglengin.EnvConfig`), triggering Pydantic's validation, and prints a summary. **Note:** It does *not* validate `TrieyeConfig` directly; `trieye` handles its own validation upon actor initialization.
- **Dynamic Defaults:** Some configurations, like `RUN_NAME` in `TrainConfig`, use `default_factory` for dynamic defaults (e.g., timestamp). This default is often overridden by the `TrieyeConfig` setting.
- **Tuned Defaults:** The default values in `TrainConfig` and `ModelConfig` are tuned for substantial learning runs. `AlphaTriangleMCTSConfig` defaults to 128 simulations.

## Exposed Interfaces

- **Pydantic Models:**
- `EnvConfig` (Imported from `trianglengin`): Environment parameters (grid size, shapes, rewards).
- [`ModelConfig`](model_config.py): Neural network architecture parameters.
- [`TrainConfig`](train_config.py): Training loop hyperparameters (batch size, learning rate, workers, PER settings, etc.).
- [`PersistenceConfig`](persistence_config.py): Data saving/loading parameters (directories within `.alphatriangle_data`, filenames).
- [`AlphaTriangleMCTSConfig`](mcts_config.py): MCTS parameters (simulations, exploration constants, temperature).
- [`StatsConfig`](stats_config.py): Statistics collection and logging parameters (metrics, aggregation, frequency).
- **Constants:**
- [`APP_NAME`](app_config.py): The name of the application.
- [`APP_NAME`](app_config.py): The name of the application (used by `trieye` for namespacing).
- **Functions:**
- `print_config_info_and_validate(mcts_config_instance: AlphaTriangleMCTSConfig | None)`: Validates and prints a summary of all configurations.
- `print_config_info_and_validate(mcts_config_instance: AlphaTriangleMCTSConfig | None)`: Validates and prints a summary of AlphaTriangle-specific configurations.

## Dependencies

Expand All @@ -40,4 +36,4 @@ This module primarily defines configurations and relies heavily on **Pydantic**.

---

**Note:** Please keep this README updated when adding, removing, or significantly modifying configuration parameters or the structure of the Pydantic models. Accurate documentation is crucial for maintainability.
**Note:** Please keep this README updated when adding, removing, or significantly modifying configuration parameters or the structure of the Pydantic models within this module.
4 changes: 0 additions & 4 deletions alphatriangle/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,14 @@
from .app_config import APP_NAME
from .mcts_config import AlphaTriangleMCTSConfig
from .model_config import ModelConfig
from .persistence_config import PersistenceConfig
from .stats_config import StatsConfig # ADDED
from .train_config import TrainConfig
from .validation import print_config_info_and_validate

__all__ = [
"APP_NAME",
"EnvConfig",
"ModelConfig",
"PersistenceConfig",
"TrainConfig",
"AlphaTriangleMCTSConfig",
"StatsConfig", # ADDED
"print_config_info_and_validate",
]
4 changes: 2 additions & 2 deletions alphatriangle/config/mcts_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from trimcts import SearchConfiguration # Import base config for reference

# Restore default simulations to a lower value for faster testing/profiling
DEFAULT_MAX_SIMULATIONS = 128
DEFAULT_MAX_DEPTH = 16
DEFAULT_MAX_SIMULATIONS = 64
DEFAULT_MAX_DEPTH = 8
DEFAULT_CPUCT = 1.5
DEFAULT_MCTS_BATCH_SIZE = 32 # Default batch size for network evals within MCTS

Expand Down
Loading
Loading