From f3cfff6df4bb7b6ac037447d465735b5837d71f0 Mon Sep 17 00:00:00 2001 From: Marcus Castro Date: Tue, 24 Feb 2026 11:49:21 -0300 Subject: [PATCH 1/3] chore: clean up artifacts and extend .gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 92732d8..2c689ad 100644 --- a/.gitignore +++ b/.gitignore @@ -148,3 +148,8 @@ CLAUDE.md # Temporary files tmp_*.html tmp_*.json + +# Personal/temporary artifacts +*.pdf +*.zip +pdf_parts/ From df4f119744f2dbae372ddb96bb582df6790ddd7f Mon Sep 17 00:00:00 2001 From: Marcus Castro Date: Tue, 24 Feb 2026 11:49:30 -0300 Subject: [PATCH 2/3] feat(web): add --verbose debug logging, fix headless mode and experiment loading - Add structured logging module (spkmc/web/logging.py) with --verbose support - Fix headless mode for non-interactive environments - Fix experiment auto-loading on dashboard - Stop silently swallowing experiment loading errors in ExperimentManager - Update architecture and usage docs --- docs/architecture.md | 84 ++++++++++++++++++++++++++++++++++- docs/gpu_integration_plan.md | 63 -------------------------- docs/usage.md | 75 +++++++++++++++++++++++++++++++ spkmc/io/experiments.py | 27 ++++++++++- spkmc/web/app.py | 10 ++++- spkmc/web/config.py | 36 ++++++++++++++- spkmc/web/logging.py | 75 +++++++++++++++++++++++++++++++ spkmc/web/pages/dashboard.py | 19 +++++++- spkmc/web/runner.py | 14 ++++++ spkmc/web/state.py | 25 ++++++++--- tests/test_web/test_config.py | 15 ++++++- 11 files changed, 365 insertions(+), 78 deletions(-) delete mode 100644 docs/gpu_integration_plan.md create mode 100644 spkmc/web/logging.py diff --git a/docs/architecture.md b/docs/architecture.md index b298737..218a00d 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -26,9 +26,24 @@ spkmc/ ├── utils/ # Utilities │ ├── __init__.py │ └── numba_utils.py # Numba-optimized functions -└── visualization/ # Visualization module +├── visualization/ # Visualization module +│ ├── __init__.py +│ └── plots.py # Visualization functions +└── web/ # Web interface module (Streamlit) ├── __init__.py - └── plots.py # Visualization functions + ├── app.py # Entry point, routing, sidebar navigation + ├── config.py # JSON preferences and Streamlit secrets + ├── state.py # Typed session state accessors with URL persistence + ├── styles.py # CSS design system (teal palette, Plus Jakarta Sans) + ├── components.py # Reusable form components + ├── plotting.py # Plotly interactive figure builders + ├── runner.py # Subprocess simulation runner + ├── analysis_runner.py # AI analysis subprocess runner + ├── logging.py # Structured debug logging + └── pages/ # Page modules + ├── dashboard.py # Experiments list with live polling + ├── experiment_detail.py # Scenario cards, charts, comparison + └── settings.py # Preferences and AI configuration ``` ## Main Components @@ -117,8 +132,62 @@ This file contains CLI parameter validators: - `validate_network_type`: Validate the network type. - `validate_distribution_type`: Validate the distribution type. +### `web` Module + +The `web` module provides an interactive browser-based interface built with Streamlit, serving as an alternative to the CLI for running simulations and managing experiments. + +#### `app.py` + +Entry point for the web interface. Handles page routing and renders the sidebar navigation with experiment discovery from the `experiments/` folder. + +#### `config.py` + +Manages user preferences persisted as a JSON file and integrates with Streamlit secrets for sensitive configuration such as AI API keys. + +#### `state.py` + +Provides typed accessors for Streamlit session state. Supports URL-based persistence so that navigation state (selected experiment, active page) survives page reloads. + +#### `styles.py` + +Defines the CSS design system applied globally across the interface. Uses a teal color palette with Plus Jakarta Sans typography, dark sidebar, and clean white card layouts. + +#### `components.py` + +Reusable form components shared across pages, including network parameter forms, distribution parameter forms, and simulation configuration inputs. + +#### `plotting.py` + +Builds interactive Plotly figures for SIR curves, scenario comparisons, and summary statistics. Produces figures consistent with the design system colors. + +#### `runner.py` + +Executes SPKMC simulations as subprocesses, streaming progress updates back to the interface. Decouples simulation execution from the Streamlit event loop. + +#### `analysis_runner.py` + +Runs AI-powered analysis of simulation results as a subprocess. Integrates with configured AI providers to generate interpretive summaries of experiment outcomes. + +#### `logging.py` + +Structured debug logging for the web interface. Activated via the `--verbose` flag on the `spkmc web` command and useful for diagnosing runtime issues. + +#### `pages/dashboard.py` + +The main landing page. Displays experiment cards with summary statistics, supports creating new experiments via a modal dialog, and uses live polling to reflect filesystem changes. + +#### `pages/experiment_detail.py` + +Detail view for a single experiment. Shows scenario cards with parameter summaries, interactive SIR charts, cross-scenario comparison plots, and data export options. + +#### `pages/settings.py` + +Configuration page for user preferences (default simulation parameters, display options) and AI provider settings (API keys, model selection). + ## Execution Flow +### CLI and Programmatic Usage + 1. The user creates a distribution instance (`GammaDistribution` or `ExponentialDistribution`). 2. The user creates an `SPKMC` instance with the distribution. 3. The user creates a network using `NetworkFactory`. @@ -126,6 +195,17 @@ This file contains CLI parameter validators: 5. The user visualizes results using `Visualizer`. 6. The user saves results using `ResultManager`. +### Web Interface + +As an alternative to the CLI, users can launch the web interface with `spkmc web`. The flow in the browser is: + +1. The dashboard discovers experiments from the `experiments/` folder and displays them as cards. +2. The user creates a new experiment or selects an existing one. +3. The user configures scenarios (network type, distribution, parameters) through form components. +4. The `runner` module executes simulations as subprocesses, reporting progress in real time. +5. Results appear as interactive Plotly charts on the experiment detail page. +6. The user can compare scenarios, export data, or run AI analysis on outcomes. + ## Optimizations SPKMC uses Numba to optimize critical functions. Optimized functions live in `numba_utils.py` and are decorated with `@njit` or `@njit(parallel=True)` for parallelization. diff --git a/docs/gpu_integration_plan.md b/docs/gpu_integration_plan.md deleted file mode 100644 index f3a33c0..0000000 --- a/docs/gpu_integration_plan.md +++ /dev/null @@ -1,63 +0,0 @@ -# SPKMC GPU Integration Plan - -This document outlines the plan to integrate GPU functionality into the SPKMC project while staying consistent with the existing object-oriented architecture. - -## 1. Overview - -### General Approach - -- Integrate GPU functionality directly into the existing `SPKMC` class -- Add a `use_gpu` parameter to relevant methods -- Make GPU dependencies optional -- Add a global `--gpu` flag in the CLI - -### Files to Modify or Create - -#### New Files -1. `spkmc/utils/gpu_utils.py` - GPU utility functions - -#### Files to Modify -1. `spkmc/core/simulation.py` - Add GPU support to `SPKMC` -2. `spkmc/cli/commands.py` - Add global `--gpu` option -3. `setup.py` - Add GPU dependencies as optional extras -4. `docs/usage.md` - Document GPU acceleration usage - -## 2. Detailed Implementation - -### 2.1 GPU Utilities Module - -Create a new file `spkmc/utils/gpu_utils.py` with GPU dependency checks, availability checks, and GPU-accelerated helpers. Key functions: - -- `check_gpu_dependencies()` -- `is_gpu_available()` -- `get_dist_gpu(...)` -- `get_states_gpu(...)` -- `calculate_gpu(...)` - -These should be guarded with conditional imports so the package works without GPU dependencies. - -### 2.2 SPKMC Class Changes - -Update `spkmc/core/simulation.py` to: - -- Accept a `use_gpu` parameter -- Detect GPU availability and auto-select GPU based on problem size -- Fall back to CPU if GPU is unavailable or errors occur - -### 2.3 CLI Changes - -Update `spkmc/cli/commands.py` to add a global `--gpu` flag that enables GPU mode. - -### 2.4 Packaging - -Expose GPU dependencies as extras (e.g., `pip install spkmc[gpu]`). - -### 2.5 Documentation - -Document GPU usage, requirements, and optional dependencies in `docs/usage.md`. - -## 3. Notes - -- GPU acceleration should be optional and never required for core functionality. -- Provide clear user messaging when GPU dependencies are missing or GPU is unavailable. -- Maintain CPU behavior as the default for reliability and compatibility. diff --git a/docs/usage.md b/docs/usage.md index 026b58b..a605b95 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -78,6 +78,8 @@ The SPKMC CLI provides the following main commands: - `plot`: Visualize results from previous simulations - `info`: Show information about saved simulations - `compare`: Compare results from multiple simulations +- `batch`: Run multiple simulation scenarios from a JSON file +- `web`: Launch the interactive web interface ### `run` Command @@ -249,6 +251,79 @@ python spkmc_cli.py compare data/spkmc/gamma/ER/results_1000_50_2.0.json data/sp python spkmc_cli.py compare data/spkmc/gamma/ER/results_1000_50_2.0.json data/spkmc/exponential/ER/results_1000_50_.json -o plots/comparison.png ``` +## Web Interface + +SPKMC includes an interactive web interface built with Streamlit that provides a browser-based alternative to the CLI. It supports experiment management, real-time simulation execution, interactive charting, and optional AI-powered analysis. + +### Launching the Web Interface + +```bash +# Start with default settings (opens browser automatically) +spkmc web + +# Specify a custom port +spkmc web --port 8502 + +# Bind to a specific host (useful for remote servers) +spkmc web --host 0.0.0.0 + +# Start without opening a browser window +spkmc web --no-browser + +# Enable verbose debug logging +spkmc web --verbose +``` + +#### Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `--port` | int | `8501` | Port number for the Streamlit server | +| `--host` | string | `localhost` | Host address to bind the server to | +| `--no-browser` | flag | `False` | Do not open a browser window on startup | +| `--verbose` | flag | `False` | Enable structured debug logging to the terminal | + +### Dashboard + +The dashboard is the landing page of the web interface. It provides an overview of all experiments and quick access to create new ones. + +- **Experiment cards**: Each experiment discovered in the `experiments/` folder is shown as a card with its name, number of scenarios, and summary statistics (node counts, network types used). +- **Summary statistics**: Aggregate counts of total experiments, scenarios, and completed runs are displayed at the top of the page. +- **Create experiment**: A modal dialog allows creating a new experiment by specifying a name and adding one or more scenario configurations. The experiment is saved as a `data.json` file in the `experiments/` directory. +- **Live polling**: The dashboard periodically checks the filesystem for new or updated experiments, so results from CLI batch runs appear automatically. + +### Experiment Detail + +Selecting an experiment from the dashboard opens the detail view, which provides full control over that experiment's scenarios and results. + +- **Scenario management**: View all scenarios in the experiment as individual cards showing their network type, distribution, node count, and other parameters. New scenarios can be added directly from this page. +- **SIR charts**: Each completed scenario displays an interactive Plotly chart with Susceptible, Infected, and Recovered curves. Charts support zoom, pan, and hover inspection. +- **Comparison**: A dedicated comparison view overlays SIR curves from multiple scenarios on the same chart, making it straightforward to evaluate the effect of parameter changes. +- **Export**: Simulation results can be exported from the detail page in standard formats for further analysis. + +### Settings + +The settings page provides configuration options that persist across sessions. + +- **Preferences**: Default values for simulation parameters (node count, samples, time steps) and display options. These defaults are applied when creating new scenarios in the web interface. +- **AI configuration**: API key and model selection for optional AI-powered analysis of simulation results. Sensitive values are stored via Streamlit secrets. +- **Defaults**: Reset all preferences to their factory values. + +### Experiment Auto-Discovery + +The web interface automatically discovers experiments from the `experiments/` folder in the project root. Any directory containing a `data.json` configuration file is recognized as an experiment. This means experiments created via the CLI `batch` command or by manually placing files in the directory are immediately visible in the dashboard without any import step. + +### Verbose Mode + +When launched with `--verbose`, the web interface outputs structured debug logs to the terminal. This is useful for diagnosing issues with simulation subprocess execution, AI analysis calls, or Streamlit state management. Log entries include timestamps, module names, and severity levels. + +```bash +# Example: debugging a simulation that fails to start +spkmc web --verbose --no-browser --port 8502 +``` + +The verbose output appears in the terminal where the `spkmc web` command was launched, not in the browser. + ## Programmatic Usage In addition to the CLI, SPKMC can be used programmatically in your own Python scripts. diff --git a/spkmc/io/experiments.py b/spkmc/io/experiments.py index fa3aa36..2b0e68f 100644 --- a/spkmc/io/experiments.py +++ b/spkmc/io/experiments.py @@ -9,9 +9,12 @@ """ import json +import logging from pathlib import Path from typing import List, Optional +logger = logging.getLogger(__name__) + # Re-export models for backward compatibility from spkmc.models import ( Experiment, @@ -64,19 +67,39 @@ def list_experiments(self) -> List[Experiment]: experiments: List[Experiment] = [] if not self.experiments_dir.exists(): + logger.debug("Experiments dir does not exist: %s", self.experiments_dir) return experiments + dirs_scanned = 0 + data_found = 0 for exp_dir in sorted(self.experiments_dir.iterdir()): if exp_dir.is_dir(): + dirs_scanned += 1 data_file = exp_dir / self.DATA_FILE_NAME if data_file.exists(): + data_found += 1 try: experiment = self.load_experiment(exp_dir.name) experiments.append(experiment) - except (json.JSONDecodeError, KeyError, ValueError): - # Skip invalid experiments + except Exception as e: + # Log instead of silently skipping, so --verbose + # reveals why an experiment was ignored (e.g. Pydantic + # validation errors). + logger.warning( + "Skipping %s: %s: %s", + exp_dir.name, + type(e).__name__, + e, + ) continue + logger.debug( + "Scanned %d dirs, %d have %s, %d loaded OK", + dirs_scanned, + data_found, + self.DATA_FILE_NAME, + len(experiments), + ) return experiments def load_experiment(self, experiment_name: str) -> Experiment: diff --git a/spkmc/web/app.py b/spkmc/web/app.py index 4bb7735..a61cb31 100644 --- a/spkmc/web/app.py +++ b/spkmc/web/app.py @@ -79,27 +79,33 @@ def render_sidebar() -> None: def main() -> None: """Main application entry point.""" + from spkmc.web.logging import debug + # Apply global styles st.markdown(get_global_styles(), unsafe_allow_html=True) # Initialize session state SessionState.init() + debug("app", "Session initialized") # Load configuration if "config" not in st.session_state: st.session_state.config = WebConfig() + debug("app", f"Config loaded from {WebConfig.CONFIG_FILE}") # Restore running simulations and analyses from disk (survives refresh) if not st.session_state.get("_sims_restored"): - SessionState.restore_running_simulations() - SessionState.restore_running_analyses() + n_sims = SessionState.restore_running_simulations() + n_analyses = SessionState.restore_running_analyses() st.session_state._sims_restored = True + debug("app", f"Restored {n_sims} running simulations, {n_analyses} running analyses") # Render sidebar render_sidebar() # Page routing current_page = SessionState.get_current_page() + debug("app", f"Routing to page: {current_page}") if current_page == "dashboard": from spkmc.web.pages import dashboard diff --git a/spkmc/web/config.py b/spkmc/web/config.py index a4ab783..4bfe181 100644 --- a/spkmc/web/config.py +++ b/spkmc/web/config.py @@ -68,6 +68,8 @@ def __init__(self) -> None: def load(self) -> None: """Load configuration from JSON file, creating with defaults if not found.""" + from spkmc.web.logging import debug + if self.CONFIG_FILE.exists(): try: with open(self.CONFIG_FILE, "r") as f: @@ -83,14 +85,22 @@ def load(self) -> None: elif isinstance(default_val, int) and isinstance(merged[key], float): merged[key] = int(merged[key]) self.config = merged + debug("config", f"Loaded config from {self.CONFIG_FILE}") + n_from_disk = len(loaded) + n_defaults = len(self.config) - n_from_disk + debug( + "config", f"{len(self.config)} config keys loaded, {n_defaults} from defaults" + ) except (json.JSONDecodeError, IOError): # If file is corrupted, start with defaults self.config = self.DEFAULTS.copy() + debug("config", "Config file corrupted, using defaults") else: # Create config directory if it doesn't exist self.CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True) self.config = self.DEFAULTS.copy() self.save() + debug("config", f"Created new config at {self.CONFIG_FILE}") def save(self) -> None: """Save current configuration to JSON file.""" @@ -178,5 +188,27 @@ def get_data_path(self) -> Path: return Path(self.get("data_directory", "data")) def get_experiments_path(self) -> Path: - """Get the experiments directory path.""" - return Path(self.get("experiments_directory", "experiments")) + """ + Get the experiments directory path. + + Resolution order: + 1. ``SPKMC_EXPERIMENTS_DIR`` environment variable (set by CLI launcher) + 2. ``experiments_directory`` key in the config file + 3. Default ``"experiments"`` + + The path is always resolved to an absolute path so it works + regardless of Streamlit's working directory. + """ + from spkmc.web.logging import debug + + env_val = os.environ.get("SPKMC_EXPERIMENTS_DIR") + if env_val: + path = Path(env_val) + debug("config", f"Experiments path: {path} (from env)") + else: + raw = self.get("experiments_directory", "experiments") + path = Path(raw) + if not path.is_absolute(): + path = path.resolve() + debug("config", f"Experiments path: {path} (from config)") + return path diff --git a/spkmc/web/logging.py b/spkmc/web/logging.py new file mode 100644 index 0000000..5cdc11d --- /dev/null +++ b/spkmc/web/logging.py @@ -0,0 +1,75 @@ +""" +Structured logging for the SPKMC web interface. + +Provides color-coded, timestamped debug output that appears in the terminal +where ``spkmc web --verbose`` is running. Debug messages are gated behind the +``SPKMC_VERBOSE`` environment variable so they add zero overhead in normal use. +""" + +from __future__ import annotations + +import os +import sys +import time + +_verbose: bool = os.environ.get("SPKMC_VERBOSE", "0") == "1" +_start_time: float = time.monotonic() + +# ANSI colour codes (stderr is almost always a terminal when running locally) +_RESET = "\033[0m" +_DIM = "\033[2m" +_YELLOW = "\033[33m" +_CYAN = "\033[36m" +_RED = "\033[31m" + +# When verbose mode is active, also enable Python's standard logging so +# that logger.warning()/debug() calls in non-web modules (e.g. +# spkmc.io.experiments) are visible in the terminal. +if _verbose: + import logging as _logging + + _handler = _logging.StreamHandler(sys.stderr) + _handler.setFormatter(_logging.Formatter("%(levelname)s [%(name)s] %(message)s")) + _spkmc_logger = _logging.getLogger("spkmc") + _spkmc_logger.setLevel(_logging.DEBUG) + _spkmc_logger.addHandler(_handler) + + +def is_verbose() -> bool: + """Return whether verbose/debug logging is enabled.""" + return _verbose + + +def _elapsed() -> str: + """Return a human-readable elapsed-time string since process start.""" + return f"{time.monotonic() - _start_time:.3f}s" + + +def debug(module: str, message: str) -> None: + """Print a debug message to stderr (only when verbose mode is on).""" + if _verbose: + print( + f"{_DIM}[{_elapsed()}]{_RESET} {_CYAN}[{module}]{_RESET} {message}", + file=sys.stderr, + ) + + +def info(module: str, message: str) -> None: + """Print an info-level message to stderr (always visible).""" + print(f"[{module}] {message}", file=sys.stderr) + + +def warn(module: str, message: str) -> None: + """Print a warning message to stderr (always visible).""" + print( + f"{_YELLOW}[{module}] WARNING: {message}{_RESET}", + file=sys.stderr, + ) + + +def error(module: str, message: str) -> None: + """Print an error message to stderr (always visible).""" + print( + f"{_RED}[{module}] ERROR: {message}{_RESET}", + file=sys.stderr, + ) diff --git a/spkmc/web/pages/dashboard.py b/spkmc/web/pages/dashboard.py index c54c05b..5cd7d4a 100644 --- a/spkmc/web/pages/dashboard.py +++ b/spkmc/web/pages/dashboard.py @@ -34,6 +34,8 @@ def render() -> None: """Render the dashboard page.""" + from spkmc.web.logging import debug + # Page header st.markdown( page_header("Experiments", subtitle="Manage and run SPKMC epidemic simulation experiments"), @@ -42,9 +44,24 @@ def render() -> None: # Load experiments config = st.session_state.config - exp_manager = ExperimentManager(str(config.get_experiments_path())) + exp_path = config.get_experiments_path() + debug("dashboard", f"Scanning experiments dir: {exp_path}") + exp_manager = ExperimentManager(str(exp_path)) experiments = exp_manager.list_experiments() + total_scenarios = sum(len(exp.scenarios) for exp in experiments) + completed = sum( + 1 + for exp in experiments + if exp.path is not None + for sc in exp.scenarios + if (exp.path / f"{sc.normalized_label}.json").exists() + ) + debug( + "dashboard", + f"Found {len(experiments)} experiments ({total_scenarios} scenarios, {completed} completed)", + ) + # Summary stats row with beautiful cards render_summary_stats(experiments) diff --git a/spkmc/web/runner.py b/spkmc/web/runner.py index 87cc346..71ca54c 100644 --- a/spkmc/web/runner.py +++ b/spkmc/web/runner.py @@ -76,6 +76,18 @@ def run_scenario( # Launch subprocess try: + from spkmc.web.logging import debug + + debug( + "runner", + f"Starting simulation: {experiment.path.name}/{scenario.label}", + ) + debug( + "runner", + f"Parameters: nodes={scenario.nodes}, samples={scenario.total_samples()}, " + f"dist={scenario.distribution}", + ) + process = subprocess.Popen( [sys.executable, str(script_file)], stdout=subprocess.PIPE, @@ -91,6 +103,8 @@ def run_scenario( with open(status_file, "w") as f: json.dump(status_data, f) + debug("runner", f"PID: {process.pid}, status: {status_file}") + if show_progress: st.toast(f"Started: {scenario.label}") diff --git a/spkmc/web/state.py b/spkmc/web/state.py index 4464ad4..0cf9527 100644 --- a/spkmc/web/state.py +++ b/spkmc/web/state.py @@ -255,16 +255,20 @@ def get_analysis_status(analysis_id: str) -> str: return "pending" @staticmethod - def restore_running_analyses() -> None: + def restore_running_analyses() -> int: """Restore running analyses from status files on disk. Scans .spkmc_web/status/ for analysis status files, verifies the PID is still alive, and adds them back to session state. Called once on session init to survive page refresh. + + Returns: + Number of analyses restored. """ + restored = 0 status_dir = Path(".spkmc_web") / "status" if not status_dir.exists(): - return + return 0 for status_file in sorted( list(status_dir.glob("exp_analysis--*.json")) @@ -345,18 +349,25 @@ def restore_running_analyses() -> None: "pid": pid, } SessionState.add_running_analysis(analysis_id, info) + restored += 1 + + return restored @staticmethod - def restore_running_simulations() -> None: + def restore_running_simulations() -> int: """Restore running simulations from status files on disk. Scans .spkmc_web/status/ for status files with running processes, verifies the PID is still alive, and adds them back to session state. Called once on session init to survive page refresh. + + Returns: + Number of simulations restored. """ + restored = 0 status_dir = Path(".spkmc_web") / "status" if not status_dir.exists(): - return + return 0 for status_file in status_dir.glob("sim--*.json"): try: @@ -420,7 +431,11 @@ def restore_running_simulations() -> None: if total > 0: SessionState.set_simulation_progress(scenario_id, progress, total) + restored += 1 + + return restored + def _is_pid_alive(pid: int) -> bool: """Check if a process with the given PID is still running.""" - return psutil.pid_exists(pid) + return bool(psutil.pid_exists(pid)) diff --git a/tests/test_web/test_config.py b/tests/test_web/test_config.py index 38f716a..3635dc6 100644 --- a/tests/test_web/test_config.py +++ b/tests/test_web/test_config.py @@ -175,7 +175,20 @@ def test_get_experiments_path_returns_path_instance(tmp_path): def test_get_experiments_path_reflects_configured_value(tmp_path): cfg = _make_config(tmp_path) cfg.set("experiments_directory", "my_experiments") - assert cfg.get_experiments_path() == Path("my_experiments") + result = cfg.get_experiments_path() + # Relative paths are now resolved to absolute + assert result.is_absolute() + assert result.name == "my_experiments" + + +def test_get_experiments_path_prefers_env_var(tmp_path, monkeypatch): + """SPKMC_EXPERIMENTS_DIR env var overrides config file.""" + cfg = _make_config(tmp_path) + cfg.set("experiments_directory", "should_be_ignored") + env_path = str(tmp_path / "from_env") + monkeypatch.setenv("SPKMC_EXPERIMENTS_DIR", env_path) + result = cfg.get_experiments_path() + assert result == Path(env_path) # ── OpenAI secrets ──────────────────────────────────────────────────────────── From c56cec5cd2e26848f0cbfc22d631464a522412a9 Mon Sep 17 00:00:00 2001 From: Marcus Castro Date: Tue, 24 Feb 2026 11:49:44 -0300 Subject: [PATCH 3/3] feat(cli): add experiments --doctor with resilient legacy key normalization Add --doctor flag to scan and fix legacy data.json files in-place. Extend ExperimentConfig.from_dict() to normalize both legacy keys (network_type, network_size, N, time_max, time_points) and legacy network type values (erdos_renyi, cn, scale_free, complete_graph, etc.) to current short codes (er, sf, cg, rrn). Case-insensitive. Doctor validates fixed data before writing and is idempotent. --- spkmc/cli/commands.py | 202 +++++++++++++++++++++- spkmc/models/experiment.py | 31 +++- tests/test_doctor.py | 332 +++++++++++++++++++++++++++++++++++++ 3 files changed, 557 insertions(+), 8 deletions(-) create mode 100644 tests/test_doctor.py diff --git a/spkmc/cli/commands.py b/spkmc/cli/commands.py index ee5410e..6dfa051 100644 --- a/spkmc/cli/commands.py +++ b/spkmc/cli/commands.py @@ -2260,6 +2260,129 @@ def _analyze_all_experiments( console.print(f" {format_param('Failed', failed)}") +# ── Legacy key mapping used by --doctor and ExperimentConfig.from_dict ── +LEGACY_KEY_MAPPING: Dict[str, str] = { + "network_type": "network", + "network_size": "nodes", + "N": "nodes", + "time_max": "t_max", + "time_points": "steps", +} + + +def _find_legacy_issues(d: Dict[str, Any]) -> List[str]: + """Return descriptions of legacy keys/values found in *d*.""" + from spkmc.models.experiment import NETWORK_VALUE_MAPPING + + found: List[str] = [] + for old_key, new_key in LEGACY_KEY_MAPPING.items(): + if old_key in d: + found.append(f"{old_key} -> {new_key}") + net_val = d.get("network_type") or d.get("network") + if isinstance(net_val, str) and net_val.lower() in NETWORK_VALUE_MAPPING: + found.append(f"{net_val} -> {NETWORK_VALUE_MAPPING[net_val.lower()]}") + return found + + +def _apply_legacy_fixes(d: Dict[str, Any]) -> None: + """Apply key renames and value normalization to *d* in-place.""" + from spkmc.models.experiment import NETWORK_VALUE_MAPPING + + for old_key, new_key in LEGACY_KEY_MAPPING.items(): + if old_key in d: + d[new_key] = d.pop(old_key) + if "network" in d and isinstance(d["network"], str): + normalized = NETWORK_VALUE_MAPPING.get(d["network"].lower()) + if normalized: + d["network"] = normalized + + +def _run_doctor(experiments_dir: str = "experiments") -> None: + """ + Scan and fix legacy key names and values in experiment data.json files. + + Rewrites data.json in-place when legacy keys or values are found, after + validating that the fixed data loads successfully. + """ + from spkmc.models.experiment import Experiment, ExperimentConfig + + exp_path = Path(experiments_dir) + if not exp_path.exists(): + log_error(f"Experiments directory not found: {exp_path.resolve()}") + return + + log_info(f"Scanning experiments directory: {exp_path.resolve()}") + console.print() + + scanned = 0 + fixed = 0 + already_ok = 0 + failed = 0 + + for data_file in sorted(exp_path.glob("*/data.json")): + exp_name = data_file.parent.name + scanned += 1 + + try: + with open(data_file, "r", encoding="utf-8") as f: + raw = json.load(f) + except (json.JSONDecodeError, OSError) as exc: + console.print(f" {exp_name} ... [red]error[/red] ({exc})") + failed += 1 + continue + + # Collect issues (legacy keys and values) + issues: List[str] = _find_legacy_issues(raw.get("parameters", {})) + + # Check each scenario + scenario_count = 0 + for scenario in raw.get("scenarios", []): + if not scenario.get("_comment"): + scenario_count += 1 + issues.extend(_find_legacy_issues(scenario)) + + if not issues: + console.print(f" {exp_name} ... [green]OK[/green] (no issues)") + already_ok += 1 + continue + + # Apply fixes to parameters + _apply_legacy_fixes(raw.get("parameters", {})) + + # Apply fixes to scenarios + for scenario in raw.get("scenarios", []): + _apply_legacy_fixes(scenario) + + # Validate the fixed data actually loads + try: + config = ExperimentConfig.from_dict(raw) + Experiment.from_config(config) + except Exception as exc: + console.print( + f" {exp_name} ... [red]failed[/red] " f"(validation error after fix: {exc})" + ) + failed += 1 + continue + + # Write back + with open(data_file, "w", encoding="utf-8") as f: + json.dump(raw, f, indent=2) + f.write("\n") + + unique_issues = sorted(set(issues)) + console.print( + f" {exp_name} ... [yellow]fixed[/yellow] " + f"({', '.join(unique_issues)}) [{scenario_count} scenarios]" + ) + fixed += 1 + + console.print() + log_info( + f"Summary: {scanned} experiments scanned, " + f"{fixed} fixed, {already_ok} already OK" + (f", {failed} failed" if failed else "") + ) + + @cli.command(name="experiments", help="Run or create experiments from the experiments directory") @click.argument("scenarios_file", type=str, default=None, required=False) @click.option( @@ -2298,6 +2421,12 @@ def _analyze_all_experiments( default=False, help="Run AI analysis after execution (requires OPENAI_API_KEY)", ) +@click.option( + "--doctor", + is_flag=True, + default=False, + help="Scan and fix legacy key names in data.json files", +) @click.pass_context def experiment( ctx: click.Context, @@ -2309,6 +2438,7 @@ def experiment( debug: bool, clear_cache: bool, analyze: bool, + doctor: bool, ) -> None: """ Run or create experiments from the experiments directory. @@ -2321,6 +2451,11 @@ def experiment( parameters for the simulation. Each scenario will run sequentially and results will be saved to separate files in the specified directory. """ + # --doctor: scan and fix legacy data.json files, then return early + if doctor: + _run_doctor() + return + # Get CLI context for global options from spkmc.cli.display import display_execution_summary from spkmc.cli.utils import get_cli_context @@ -2800,24 +2935,60 @@ def clean( @click.option("--port", "-p", default=8501, type=int, help="Port to run the server on") @click.option("--host", default="localhost", type=str, help="Host to bind to") @click.option("--no-browser", is_flag=True, help="Do not open browser automatically") -def web(port: int, host: str, no_browser: bool) -> None: +@click.option("--verbose", "-v", is_flag=True, help="Show detailed debug logging in the terminal") +def web(port: int, host: str, no_browser: bool, verbose: bool) -> None: """Launch the Streamlit web interface.""" + import os import subprocess import sys + import threading + import webbrowser from pathlib import Path log_info("Starting SPKMC web interface...") - # Find the app.py file + # ── Verbose / debug setup ───────────────────────── + if verbose: + os.environ["SPKMC_VERBOSE"] = "1" + + def _dbg(msg: str) -> None: + if verbose: + print(f" [launcher] {msg}", file=sys.stderr) + + _dbg(f"CWD: {Path.cwd()}") + + # ── Resolve experiments directory ───────────────── + # Make the path absolute so Streamlit (which may change CWD) can find it. + experiments_dir = Path.cwd() / "experiments" + env_exp = os.environ.get("SPKMC_EXPERIMENTS_DIR") + if env_exp: + experiments_dir = Path(env_exp).resolve() + else: + experiments_dir = experiments_dir.resolve() + os.environ["SPKMC_EXPERIMENTS_DIR"] = str(experiments_dir) + + if experiments_dir.is_dir(): + n_exp = sum(1 for p in experiments_dir.iterdir() if p.is_dir()) + _dbg(f"Experiments dir: {experiments_dir} (found, {n_exp} experiments)") + else: + _dbg(f"Experiments dir: {experiments_dir} (not found, will be created on demand)") + + # ── Locate web app ──────────────────────────────── web_app = Path(__file__).parent.parent / "web" / "app.py" + _dbg(f"Web app: {web_app}") if not web_app.exists(): log_error(f"Web app not found at {web_app}") log_error("Web interface files are missing. Reinstall SPKMC: pip install --upgrade spkmc") sys.exit(1) - # Build streamlit command with all config as CLI flags - # (avoids requiring a .streamlit/config.toml file on disk) + # ── Config file status ──────────────────────────── + config_file = Path.home() / ".spkmc" / "web_config.json" + _dbg(f"Config file: {config_file} ({'exists' if config_file.exists() else 'not found'})") + + # ── Build streamlit command ─────────────────────── + # Always headless=true to suppress the first-run email prompt. + # Browser auto-open is handled separately via webbrowser.open(). cmd = [ sys.executable, "-m", @@ -2829,7 +3000,7 @@ def web(port: int, host: str, no_browser: bool) -> None: "--server.address", host, "--server.headless", - "true" if no_browser else "false", + "true", "--server.fileWatcherType", "none", "--browser.gatherUsageStats", @@ -2852,7 +3023,26 @@ def web(port: int, host: str, no_browser: bool) -> None: "sans serif", ] - log_info(f"Launching at http://{host}:{port}") + _dbg(f"Streamlit command: {' '.join(cmd)}") + _dbg(f"Browser auto-open: {'no' if no_browser else 'yes'}") + + url = f"http://{host}:{port}" + log_info(f"Launching at {url}") + + # ── Auto-open browser in a background thread ────── + if not no_browser: + + def _open_browser() -> None: + import time + + time.sleep(2) # give Streamlit time to bind the port + _dbg(f"Opening browser at {url}") + webbrowser.open(url) + + t = threading.Thread(target=_open_browser, daemon=True) + t.start() + + _dbg("Starting Streamlit server...") try: result = subprocess.run(cmd) diff --git a/spkmc/models/experiment.py b/spkmc/models/experiment.py index 91c2122..b231e30 100644 --- a/spkmc/models/experiment.py +++ b/spkmc/models/experiment.py @@ -14,6 +14,22 @@ from spkmc.models.config import PlotConfig from spkmc.models.scenario import Scenario, ScenarioOverride +# Mapping of legacy network type values (full names) to current short codes. +# Keys are lowercase; lookup should lowercase the input first. +NETWORK_VALUE_MAPPING: Dict[str, str] = { + "erdos_renyi": "er", + "erdos-renyi": "er", + "cn": "sf", + "complex_network": "sf", + "complex-network": "sf", + "scale_free": "sf", + "scale-free": "sf", + "complete_graph": "cg", + "complete": "cg", + "random_regular": "rrn", + "random-regular": "rrn", +} + class ExperimentConfig(BaseModel): """ @@ -42,17 +58,28 @@ def from_dict(cls, data: Dict[str, Any]) -> "ExperimentConfig": Returns: ExperimentConfig instance """ - # Key mapping for parameter normalization + # Key mapping for parameter normalization (includes legacy keys) key_mapping = { + "network_type": "network", + "network_size": "nodes", + "N": "nodes", "time_max": "t_max", "time_points": "steps", } + # Value mapping for fields whose values changed across versions + value_mapping: Dict[str, Dict[str, str]] = { + "network": NETWORK_VALUE_MAPPING, + } + def normalize_params(params: Dict[str, Any]) -> Dict[str, Any]: - """Normalize parameter keys to internal format.""" + """Normalize parameter keys and values to internal format.""" normalized = {} for key, value in params.items(): normalized_key = key_mapping.get(key, key) + if normalized_key in value_mapping and isinstance(value, str): + lowered = value.lower() + value = value_mapping[normalized_key].get(lowered, lowered) normalized[normalized_key] = value return normalized diff --git a/tests/test_doctor.py b/tests/test_doctor.py new file mode 100644 index 0000000..d20739e --- /dev/null +++ b/tests/test_doctor.py @@ -0,0 +1,332 @@ +""" +Tests for the --doctor flag and legacy key normalization. + +Covers: +- ExperimentConfig.from_dict() normalizing legacy keys +- _run_doctor() rewriting data.json files +- Idempotency (running doctor twice produces no changes) +- No-op when data.json has no legacy keys +""" + +import json +import tempfile +from pathlib import Path +from typing import Any, Dict + +import pytest + +from spkmc.cli.commands import LEGACY_KEY_MAPPING, _run_doctor +from spkmc.models.experiment import Experiment, ExperimentConfig + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +def _base_params() -> Dict[str, Any]: + """Return a complete set of valid experiment parameters (current schema).""" + return { + "network": "er", + "distribution": "gamma", + "nodes": 1000, + "samples": 50, + "k_avg": 10, + "shape": 2.0, + "scale": 0.5, + "lambda": 0.5, + "t_max": 10, + "steps": 100, + "initial_perc": 0.01, + } + + +def _legacy_params() -> Dict[str, Any]: + """Return parameters using ALL legacy key names.""" + return { + "network_type": "er", + "distribution": "gamma", + "N": 1000, + "samples": 50, + "k_avg": 10, + "shape": 2.0, + "scale": 0.5, + "lambda": 0.5, + "time_max": 10, + "time_points": 100, + "initial_perc": 0.01, + } + + +def _write_experiment( + base_dir: Path, + name: str, + parameters: Dict[str, Any], + scenarios: Any = None, +) -> Path: + """Write a data.json file inside base_dir/name/.""" + exp_dir = base_dir / name + exp_dir.mkdir(parents=True, exist_ok=True) + data = { + "name": name, + "parameters": parameters, + "scenarios": scenarios or [{"label": "baseline"}], + } + data_file = exp_dir / "data.json" + data_file.write_text(json.dumps(data, indent=2) + "\n", encoding="utf-8") + return data_file + + +# --------------------------------------------------------------------------- +# ExperimentConfig.from_dict() normalization tests +# --------------------------------------------------------------------------- + + +class TestKeyNormalization: + """Test that from_dict() normalizes legacy keys correctly.""" + + def test_network_type_to_network(self): + params = _base_params() + params["network_type"] = params.pop("network") + data = {"name": "test", "parameters": params, "scenarios": [{"label": "s1"}]} + config = ExperimentConfig.from_dict(data) + assert "network" in config.parameters + assert "network_type" not in config.parameters + + def test_network_size_to_nodes(self): + params = _base_params() + params["network_size"] = params.pop("nodes") + data = {"name": "test", "parameters": params, "scenarios": [{"label": "s1"}]} + config = ExperimentConfig.from_dict(data) + assert config.parameters["nodes"] == 1000 + assert "network_size" not in config.parameters + + def test_N_to_nodes(self): + params = _base_params() + params["N"] = params.pop("nodes") + data = {"name": "test", "parameters": params, "scenarios": [{"label": "s1"}]} + config = ExperimentConfig.from_dict(data) + assert config.parameters["nodes"] == 1000 + assert "N" not in config.parameters + + def test_time_max_to_t_max(self): + params = _base_params() + params["time_max"] = params.pop("t_max") + data = {"name": "test", "parameters": params, "scenarios": [{"label": "s1"}]} + config = ExperimentConfig.from_dict(data) + assert config.parameters["t_max"] == 10 + assert "time_max" not in config.parameters + + def test_time_points_to_steps(self): + params = _base_params() + params["time_points"] = params.pop("steps") + data = {"name": "test", "parameters": params, "scenarios": [{"label": "s1"}]} + config = ExperimentConfig.from_dict(data) + assert config.parameters["steps"] == 100 + assert "time_points" not in config.parameters + + def test_all_legacy_keys_at_once(self): + data = { + "name": "legacy_all", + "parameters": _legacy_params(), + "scenarios": [{"label": "s1"}], + } + config = ExperimentConfig.from_dict(data) + experiment = Experiment.from_config(config) + assert len(experiment.scenarios) == 1 + assert experiment.scenarios[0].network == "er" + assert experiment.scenarios[0].nodes == 1000 + assert experiment.scenarios[0].t_max == 10 + assert experiment.scenarios[0].steps == 100 + + def test_legacy_keys_in_scenario_override(self): + params = _base_params() + scenarios = [ + {"label": "override", "network_type": "sf", "N": 2000, "exponent": 2.5}, + ] + data = {"name": "test", "parameters": params, "scenarios": scenarios} + config = ExperimentConfig.from_dict(data) + experiment = Experiment.from_config(config) + assert experiment.scenarios[0].network == "sf" + assert experiment.scenarios[0].nodes == 2000 + + @pytest.mark.parametrize( + "legacy_value,expected", + [ + ("erdos_renyi", "er"), + ("erdos-renyi", "er"), + ("Erdos-Renyi", "er"), + ("ER", "er"), + ("cn", "sf"), + ("CN", "sf"), + ("complex_network", "sf"), + ("complex-network", "sf"), + ("scale_free", "sf"), + ("scale-free", "sf"), + ("Scale-Free", "sf"), + ("SF", "sf"), + ("complete_graph", "cg"), + ("complete", "cg"), + ("CG", "cg"), + ("random_regular", "rrn"), + ("random-regular", "rrn"), + ("RRN", "rrn"), + ], + ) + def test_legacy_network_values_in_parameters(self, legacy_value, expected): + """Legacy network type values (full names) are normalized to short codes.""" + params = _base_params() + params["network"] = legacy_value + data = {"name": "test", "parameters": params, "scenarios": [{"label": "s1"}]} + config = ExperimentConfig.from_dict(data) + assert config.parameters["network"] == expected + + def test_legacy_network_values_in_scenarios(self): + """Legacy network type values in scenario overrides are normalized.""" + params = _base_params() + scenarios = [ + {"label": "er_full", "network_type": "erdos_renyi"}, + {"label": "sf_full", "network_type": "scale_free", "exponent": 2.5}, + {"label": "cg_full", "network_type": "complete_graph"}, + ] + data = {"name": "test", "parameters": params, "scenarios": scenarios} + config = ExperimentConfig.from_dict(data) + experiment = Experiment.from_config(config) + assert experiment.scenarios[0].network == "er" + assert experiment.scenarios[1].network == "sf" + assert experiment.scenarios[2].network == "cg" + + +# --------------------------------------------------------------------------- +# _run_doctor() tests +# --------------------------------------------------------------------------- + + +class TestDoctor: + """Test the _run_doctor() function.""" + + def test_fixes_legacy_keys(self): + """Doctor rewrites data.json with legacy keys to current schema.""" + with tempfile.TemporaryDirectory() as tmpdir: + data_file = _write_experiment(Path(tmpdir), "legacy_exp", _legacy_params()) + + _run_doctor(experiments_dir=tmpdir) + + fixed = json.loads(data_file.read_text(encoding="utf-8")) + params = fixed["parameters"] + assert "network" in params + assert "nodes" in params + assert "t_max" in params + assert "steps" in params + # Legacy keys must be gone + assert "network_type" not in params + assert "N" not in params + assert "time_max" not in params + assert "time_points" not in params + + def test_fixes_legacy_keys_in_scenarios(self): + """Doctor fixes legacy keys inside individual scenarios too.""" + with tempfile.TemporaryDirectory() as tmpdir: + params = _base_params() + scenarios = [ + {"label": "s1", "network_type": "sf", "N": 2000, "exponent": 2.5}, + {"label": "s2", "network_type": "rrn"}, + ] + data_file = _write_experiment( + Path(tmpdir), "scenario_legacy", params, scenarios=scenarios + ) + + _run_doctor(experiments_dir=tmpdir) + + fixed = json.loads(data_file.read_text(encoding="utf-8")) + for s in fixed["scenarios"]: + assert "network_type" not in s + assert "N" not in s + + def test_fixes_legacy_network_values(self): + """Doctor normalizes full network type names to short codes.""" + with tempfile.TemporaryDirectory() as tmpdir: + params = _base_params() + params["network_type"] = "erdos_renyi" + scenarios = [ + {"label": "s1"}, + {"label": "sf_scenario", "network_type": "scale_free", "exponent": 2.5}, + {"label": "cg_scenario", "network_type": "complete_graph"}, + ] + data_file = _write_experiment(Path(tmpdir), "value_legacy", params, scenarios=scenarios) + + _run_doctor(experiments_dir=tmpdir) + + fixed = json.loads(data_file.read_text(encoding="utf-8")) + assert fixed["parameters"]["network"] == "er" + assert fixed["scenarios"][1]["network"] == "sf" + assert fixed["scenarios"][2]["network"] == "cg" + + def test_idempotent(self): + """Running doctor twice should not change anything the second time.""" + with tempfile.TemporaryDirectory() as tmpdir: + data_file = _write_experiment(Path(tmpdir), "idem_exp", _legacy_params()) + + # First run: should fix + _run_doctor(experiments_dir=tmpdir) + content_after_first = data_file.read_text(encoding="utf-8") + + # Second run: should be a no-op + _run_doctor(experiments_dir=tmpdir) + content_after_second = data_file.read_text(encoding="utf-8") + + assert content_after_first == content_after_second + + def test_no_op_for_current_schema(self): + """Doctor leaves data.json untouched if it uses current schema.""" + with tempfile.TemporaryDirectory() as tmpdir: + data_file = _write_experiment(Path(tmpdir), "current_exp", _base_params()) + original = data_file.read_text(encoding="utf-8") + + _run_doctor(experiments_dir=tmpdir) + + assert data_file.read_text(encoding="utf-8") == original + + def test_handles_missing_directory(self, capsys): + """Doctor reports error gracefully for missing experiments dir.""" + _run_doctor(experiments_dir="/nonexistent/path") + # Should not raise - just print an error message + + def test_handles_invalid_json(self): + """Doctor skips files with invalid JSON.""" + with tempfile.TemporaryDirectory() as tmpdir: + exp_dir = Path(tmpdir) / "broken_exp" + exp_dir.mkdir() + (exp_dir / "data.json").write_text("{ not valid json }") + + # Should not raise + _run_doctor(experiments_dir=tmpdir) + + def test_preserves_extra_fields(self): + """Doctor preserves fields it doesn't know about.""" + with tempfile.TemporaryDirectory() as tmpdir: + params = _legacy_params() + params["custom_field"] = "keep_me" + data_file = _write_experiment(Path(tmpdir), "extra_fields", params) + + _run_doctor(experiments_dir=tmpdir) + + fixed = json.loads(data_file.read_text(encoding="utf-8")) + assert fixed["parameters"]["custom_field"] == "keep_me" + + def test_multiple_experiments(self): + """Doctor processes multiple experiments in one run.""" + with tempfile.TemporaryDirectory() as tmpdir: + base = Path(tmpdir) + _write_experiment(base, "exp_legacy", _legacy_params()) + _write_experiment(base, "exp_current", _base_params()) + + _run_doctor(experiments_dir=tmpdir) + + # Legacy should be fixed + legacy = json.loads((base / "exp_legacy" / "data.json").read_text(encoding="utf-8")) + assert "network" in legacy["parameters"] + assert "N" not in legacy["parameters"] + + # Current should be unchanged + current = json.loads((base / "exp_current" / "data.json").read_text(encoding="utf-8")) + assert "network" in current["parameters"]