From e9bae0908f71f6ea9082e0fafc6948c6aefe42f6 Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 10:49:09 +0300 Subject: [PATCH 01/10] docs: add Phase D (RL integration) design spec Covers SurfaceDiscoveryEnv, PPO/SAC trainer, benchmark suite (17 controllers x 5 plants), and visualization module. Co-Authored-By: Claude Opus 4.6 (1M context) --- ...026-03-19-phase-d-rl-integration-design.md | 320 ++++++++++++++++++ 1 file changed, 320 insertions(+) create mode 100644 docs/superpowers/specs/2026-03-19-phase-d-rl-integration-design.md diff --git a/docs/superpowers/specs/2026-03-19-phase-d-rl-integration-design.md b/docs/superpowers/specs/2026-03-19-phase-d-rl-integration-design.md new file mode 100644 index 0000000..b8c7249 --- /dev/null +++ b/docs/superpowers/specs/2026-03-19-phase-d-rl-integration-design.md @@ -0,0 +1,320 @@ +# OpenSMC Phase D: RL Integration — Design Spec + +**Date:** 2026-03-19 +**Status:** Approved +**Scope:** Port autosmc RL infrastructure into `opensmc` Python package + full benchmark suite + +## Decisions + +| Decision | Choice | Rationale | +|----------|--------|-----------| +| Goal | Port infrastructure + run benchmark | Both needed for Paper 3 | +| Plants | 5 existing Gym envs | DoubleInt, Crane, Quadrotor, InvPendulum, PMSM — strong cross-section | +| RL formulation | Surface discovery (agent outputs sigma) | Core novelty; enables fingerprinting | +| Benchmark tables | Fixed law + best-matched controller | Two stories: surface quality + practical performance | +| RL algorithms | PPO + SAC | On-policy + off-policy proves algorithm-independence | +| Training mode | Independent per plant | Fingerprinting answers cross-plant question without curriculum | + +## Module Structure + +``` +opensmc/rl/ +├── __init__.py # public API (expanded) +├── rl_surface.py # EXISTS — fix: SAC loading + 4D obs padding +├── fingerprinting.py # EXISTS — no changes +├── discovery_env.py # NEW — SurfaceDiscoveryEnv (agent outputs sigma) +├── trainer.py # NEW — PPO/SAC training with VecNormalize +├── benchmark.py # NEW — RL vs 17 controllers x 5 plants +└── visualize.py # NEW — heatmaps, radar, contours, bar charts +``` + +## Component 1: SurfaceDiscoveryEnv + +Gymnasium environment where the RL agent discovers sliding surfaces. Ported from `autosmc/envs/discovery_env.py`, adapted to wrap OpenSMC `Plant` classes. + +**Interface:** +```python +env = SurfaceDiscoveryEnv( + plant="double_integrator", # string or Plant instance + disturbance="sinusoidal", # none | constant | sinusoidal | step + disturbance_amplitude=1.0, + dt=0.01, + max_steps=500, # 5 seconds + sigma_max=20.0, + control_gains={"K": 5, "lam": 3, "phi": 0.05}, +) +``` + +**String-to-plant mapping:** When `plant` is a string, the env resolves it via: +```python +PLANT_REGISTRY = { + "double_integrator": DoubleIntegrator, + "inverted_pendulum": InvertedPendulum, + "crane": Crane, + "quadrotor": Quadrotor, + "pmsm": PMSM, +} +``` +When `plant` is a `Plant` instance, it is used directly. The env calls `plant.dynamics(t, x, u)` for RK4 integration. + +**Spaces:** +- Observation: `Box(4)` — `[e, edot, sigma_prev, |u_prev|]` + - low: `[-10, -10, -50, 0]`, high: `[10, 10, 50, 100]` +- Action: `Box(1)` — `[-1, 1]` scaled to `[-sigma_max, sigma_max]` + +**Control law:** `u = -K * sat(sigma / phi) - lam * sigma` + +This is a self-contained control law internal to the env, NOT composed with an OpenSMC `Controller` instance. This is intentional: the env must be a standard Gymnasium environment with a simple `step(action) -> obs` interface. The RL agent discovers the surface; the fixed control law converts it to a control input. This matches the autosmc formulation and ensures the training reward reflects surface quality, not controller design. + +**Reward:** +``` +r = -1.0 * e^2 - 0.01 * u^2 - 0.005 * du^2 + + 5.0 * dt (when |e| < 0.02 and |edot| < 0.05) + - 50 (on truncation: |x[0]| > 10 or |x[1]| > 20) +``` + +**Plant-to-error mapping:** + +| Plant | e | edot | How | +|-------|---|------|-----| +| DoubleIntegrator | `x[0] - ref` | `x[1]` | direct state | +| InvertedPendulum | `x[2]` (theta) | `x[3]` (theta_dot) | direct state | +| Crane | `x[0] - ref` (trolley) | `x[1]` (trolley_dot) | direct state | +| Quadrotor | `x[2] - ref` (z) | `x[8]` (vz) | direct state | +| PMSM | `x[2] - ref` (omega) | `(1.5*pp*psi_f*x[1] - B*x[2] - TL) / J` | computed from dynamics | + +PMSM `edot` is computed from the motor dynamics: `domega/dt = (Te - B*omega - TL) / J` where `Te = 1.5 * pp * psi_f * i_q` (non-salient pole simplification). The env accesses `i_q = x[1]` from the PMSM state vector `[i_d, i_q, omega, theta]`. Load torque `TL` is separate from the disturbance `d` (TL is constant mechanical load, d is added to the dynamics as external perturbation). + +**Integration:** RK4 with 4 substeps per dt. + +**Registration:** `OpenSMC/SurfaceDiscovery-v0` with plant passed via `env_kwargs`. + +## Component 2: Trainer + +Wraps stable-baselines3 PPO/SAC with OpenSMC-validated defaults. + +**Interface:** +```python +result = train_surface( + plant="double_integrator", + algorithm="PPO", # "PPO" | "SAC" + disturbance="sinusoidal", + total_timesteps=500_000, + n_envs=4, + net_arch=[64, 64], + seed=42, + output_dir="trained_models/", + eval_freq=10_000, + save_best=True, + normalize_obs=True, + normalize_reward=True, + clip_obs=10.0, + env_kwargs=None, +) +``` + +**Algorithm defaults:** + +| Parameter | PPO | SAC | +|-----------|-----|-----| +| learning_rate | 3e-4 | 3e-4 | +| n_steps / buffer_size | 512 | 100_000 | +| batch_size | 64 | 256 | +| n_epochs / train_freq | 10 | 1 | +| gamma | 0.99 | 0.99 | +| gae_lambda / tau | 0.95 | 0.005 | +| clip_range | 0.2 | n/a | +| net_arch | [64, 64] | [64, 64] | + +No learning rate scheduling — constant 3e-4. + +**VecNormalize setup:** +```python +def _make_env(plant, disturbance, env_kwargs): + def _init(): + return SurfaceDiscoveryEnv(plant=plant, disturbance=disturbance, **(env_kwargs or {})) + return _init + +env = DummyVecEnv([_make_env(plant, disturbance, env_kwargs) for _ in range(n_envs)]) +env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0) +``` + +**Monitoring:** Custom `TrainingMonitor` callback that evaluates on an **unnormalized** eval env every `eval_freq` steps, tracking both cumulative reward and ISE. When `save_best=True`, also uses SB3's `EvalCallback` to auto-save the best model by mean reward. + +**Output:** +```python +@dataclass +class TrainingResult: + model_path: str # .zip (final model) + vecnorm_path: str # .pkl (normalization stats) + best_model_path: str | None # best model path (None only when save_best=False) + reward_history: list[float] # per-eval rewards + ise_history: list[float] # per-eval ISE values + final_reward: float + final_ise: float + training_time: float + algorithm: str + plant: str + disturbance: str +``` + +**File naming:** `{algo}_{plant}_{disturbance}.zip` / `{algo}_{plant}_{disturbance}_vecnorm.pkl` + +**Batch helper:** +```python +results = train_all_surfaces( + plants=[...], algorithms=[...], disturbances=[...], + total_timesteps=500_000, output_dir="trained_models/", + base_seed=42, # each run gets base_seed + index for reproducibility +) +# 5 plants x 2 algos x 4 disturbances = 40 models +``` + +Seed propagation: `train_all_surfaces` assigns `seed = base_seed + i` where `i` is the flat index across the (plant, algorithm, disturbance) grid. Seeds are logged in each `TrainingResult` for reproducibility. + +## Component 3: Benchmark + +Orchestrates RL vs all controllers across all plants. + +**Interface:** +```python +results = benchmark( + plants=["double_integrator", "inverted_pendulum", "crane", "quadrotor", "pmsm"], + trained_models_dir="trained_models/", + algorithms=["PPO", "SAC"], + disturbances=["none", "constant", "sinusoidal", "step"], + T=5.0, dt=0.01, + fixed_gains={"K": 5, "lam": 3, "phi": 0.05}, + include_matched=True, + output_dir="benchmark_results/", +) +``` + +### Table 1 — Fixed switching law (240 simulations) + +12 surfaces x 5 plants x 4 disturbances. All use `u = -K*sat(s/phi) - lam*s` with K=5, lam=3, phi=0.05. Isolates surface quality independent of controller design. + +**The 12 surfaces:** +1. `LinearSurface` +2. `TerminalSurface` +3. `NonsingularTerminalSurface` +4. `FastTerminalSurface` +5. `IntegralSlidingSurface` +6. `IntegralTerminalSurface` +7. `HierarchicalSurface` +8. `PIDSurface` +9. `GlobalSurface` +10. `PredefinedTimeSurface` +11. `NonlinearDampingSurface` +12. `RLDiscoveredSurface` (loaded from trained model) + +### Table 2 — Best-matched controller (340 simulations) + +17 controllers x 5 plants x 4 disturbances. Each controller uses its best-matched surface via OpenSMC's composition API. + +**The 17 controller configurations:** + +OpenSMC uses composition: most controllers accept a `surface=` parameter. The benchmark constructs each pairing explicitly via a factory. + +| # | Controller class | Surface | Composition | +|---|-----------------|---------|-------------| +| 1 | `ClassicalSMC` | `LinearSurface` | `ClassicalSMC(surface=LinearSurface(c=10))` | +| 2 | `AdaptiveSMC` | `NonlinearDampingSurface` | `AdaptiveSMC(surface=NonlinearDampingSurface(...))` | +| 3 | `DynamicSMC` | `LinearSurface` | `DynamicSMC(surface=LinearSurface(c=10))` | +| 4 | `ITSMC` | `IntegralTerminalSurface` | `ITSMC(surface=IntegralTerminalSurface(...))` | +| 5 | `NFTSMC` | `NonsingularTerminalSurface` | `NFTSMC(surface=NonsingularTerminalSurface(...))` | +| 6 | `FixedTimeSMC` | `PredefinedTimeSurface` | `FixedTimeSMC(surface=PredefinedTimeSurface(...))` | +| 7 | `FuzzySMC` | `LinearSurface` | `FuzzySMC(surface=LinearSurface(c=10))` | +| 8 | `DiscreteSMC` | `LinearSurface` | `DiscreteSMC(surface=LinearSurface(c=10))` | +| 9 | `CombiningHSMC` | `HierarchicalSurface` | `CombiningHSMC(surface=HierarchicalSurface(...))` | +| 10 | `AggregatedHSMC` | `HierarchicalSurface` | `AggregatedHSMC(surface=HierarchicalSurface(...))` | +| 11 | `IncrementalHSMC` | `HierarchicalSurface` | `IncrementalHSMC(surface=HierarchicalSurface(...))` | +| 12 | `TwistingSMC` | `LinearSurface` | `TwistingSMC(surface=LinearSurface(c=10))` | +| 13 | `QuasiContinuous2SMC` | `LinearSurface` | `QuasiContinuous2SMC(surface=LinearSurface(c=10))` | +| 14 | `NestedHOSMC` | n/a | `NestedHOSMC(order=2)` (standalone) | +| 15 | `QuasiContinuousHOSMC` | n/a | `QuasiContinuousHOSMC(order=2)` (standalone) | +| 16 | `PID` | n/a | `PID(Kp=10, Kd=5, Ki=0.5)` (standalone) | +| 17 | `LQR` | n/a | `LQR(A, B)` (standalone, plant-specific A/B) | + +**Missing model handling:** If a trained model file is not found for a given (algorithm, plant, disturbance) combination, that RL entry is skipped with a warning logged. The benchmark still runs all classical controllers. This allows partial benchmarks (e.g., after training only PPO models). + +**Metrics per simulation:** ISE, ITAE, settling time, overshoot, chattering index, steady-state error. + +**Output:** +```python +@dataclass +class BenchmarkResults: + table1: pd.DataFrame # index: (surface, plant, disturbance) → metrics + table2: pd.DataFrame # index: (controller, plant, disturbance) → metrics + fingerprints: dict # {(algo, plant, disturbance): fingerprint_vector} + rankings: dict # {metric: ranked list of (surface/controller, score)} + + def to_latex(self) -> str # Paper-ready LaTeX tables + def to_json(self, path: str) # Machine-readable export + def summary(self) -> str # Console-friendly summary +``` + +Total: 580 simulations, ~2 minutes wall time. + +## Component 4: Visualize + +Pure matplotlib, no GUI dependencies, publication-quality defaults. + +**Functions:** +1. `surface_heatmap(model_path, plant, ...)` — sigma(e, edot) grid +2. `contour_overlay(model_path, reference_surfaces, ...)` — RL vs classical s=0 contours +3. `radar_chart(fingerprint_result)` — similarity to 10 known fingerprinting defaults +4. `cross_plant_radar(fingerprints_by_plant)` — side-by-side per plant +5. `benchmark_bars(benchmark_results, metric, plant)` — bar chart for all 12 surfaces (Table 1) or 17 controllers (Table 2) +6. `training_curve(training_result)` — reward + ISE over steps +7. `time_domain(benchmark_results, plant, top_n, disturbance)` — state/control trajectories + +The radar chart uses the 10 fingerprinting defaults from `fingerprinting.get_default_known_surfaces()`. The benchmark bar charts show all 12 surfaces (Table 1) or all 17 controllers (Table 2). These are different views — fingerprinting answers "what did RL learn?", benchmarking answers "how well does it perform?". + +All return `matplotlib.Figure`. Optional `save_path` for PNG/PDF export. + +## Changes to Existing Files + +1. **`opensmc/rl/rl_surface.py`:** + - `_load_sb3`: try `PPO.load()`, then `SAC.load()`, raise if neither works + - `_make_obs`: detect 4D models (trained on SurfaceDiscoveryEnv) and pad `[e, edot, 0.0, 0.0]` — zeros for sigma_prev and |u_prev| following autosmc's static extraction pattern + +2. **`opensmc/rl/__init__.py`:** Expand public API with new imports. + +3. **`pyproject.toml`:** Add `pandas>=1.5` to `[rl]` optional dependencies. Pandas is used by `benchmark.py` for results DataFrames. Users who only want training without benchmarking still get pandas — acceptable trade-off for a flat dependency surface. + +## Testing + +| Test file | Count | Coverage | +|-----------|-------|---------| +| `test_discovery_env.py` | ~15 | Gym API, all 5 plants, rewards, truncation, disturbances, PMSM edot | +| `test_trainer.py` | ~10 | PPO/SAC (5K steps), model save/load, VecNormalize, seed reproducibility | +| `test_benchmark.py` | ~10 | Fixed-law, matched-controller factory, metrics, missing model skip, LaTeX | +| `test_visualize.py` | ~7 | Each function returns Figure, save_path works | + +Training tests use 5K steps (fast). Benchmark tests use 2 surfaces x 1 plant. All run without GPU. + +## New Examples + +| File | Purpose | +|------|---------| +| `examples/train_and_fingerprint.py` | Train PPO -> fingerprint -> radar chart | +| `examples/full_benchmark.py` | Full benchmark -> LaTeX tables | + +## LOC Estimate + +| Component | LOC | +|-----------|-----| +| discovery_env.py | ~250 | +| trainer.py | ~200 | +| benchmark.py | ~350 | +| visualize.py | ~250 | +| rl_surface.py edits | ~20 | +| __init__.py + pyproject.toml | ~12 | +| Tests (4 files) | ~300 | +| Examples (2 files) | ~100 | +| **Total** | **~1,480** | + +## Source Lineage + +Code is ported from `autosmc/` (D:/Ali_Kufa_University/Journals Paper/RL-Discovered-Sliding-Surfaces/autosmc/) and adapted to OpenSMC's class hierarchy. No code is imported at runtime — OpenSMC remains self-contained. From 9bf970bdef9de29336183194f6e146873518ca80 Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 11:18:51 +0300 Subject: [PATCH 02/10] docs: add Phase D implementation plan (10 tasks, TDD) Fixes from review: IntegralSlidingSurface params, Quadrotor disturbance format, exact switching law in benchmark, pytest skip patterns, Gym registration, unused import removed. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../2026-03-19-phase-d-rl-integration.md | 2214 +++++++++++++++++ 1 file changed, 2214 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-19-phase-d-rl-integration.md diff --git a/docs/superpowers/plans/2026-03-19-phase-d-rl-integration.md b/docs/superpowers/plans/2026-03-19-phase-d-rl-integration.md new file mode 100644 index 0000000..9eb8862 --- /dev/null +++ b/docs/superpowers/plans/2026-03-19-phase-d-rl-integration.md @@ -0,0 +1,2214 @@ +# Phase D: RL Integration — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add full RL training pipeline, benchmark suite, and visualization to OpenSMC's Python package so users can train RL-discovered sliding surfaces and compare them against all 17 classical controllers. + +**Architecture:** Port autosmc's proven SurfaceDiscoveryEnv and training pipeline into `opensmc/rl/`, adapting them to use OpenSMC's Plant/Controller/Surface class hierarchy. Add a benchmark module that runs 580 simulations (12 surfaces x 5 plants + 17 controllers x 5 plants, each across 4 disturbance types) and a visualization module for publication-quality figures. + +**Tech Stack:** Python 3.9+, NumPy, Gymnasium 0.29+, Stable-Baselines3 2.0+, PyTorch 2.0+, pandas 1.5+, matplotlib 3.5+ + +**Spec:** `docs/superpowers/specs/2026-03-19-phase-d-rl-integration-design.md` + +--- + +## File Map + +| Action | Path | Responsibility | +|--------|------|----------------| +| CREATE | `opensmc/rl/discovery_env.py` | SurfaceDiscoveryEnv — Gym env where RL agent outputs sigma | +| CREATE | `opensmc/rl/trainer.py` | PPO/SAC training with VecNormalize, callbacks, batch training | +| CREATE | `opensmc/rl/benchmark.py` | 580-simulation benchmark, two tables, LaTeX export | +| CREATE | `opensmc/rl/visualize.py` | 7 plotting functions (heatmap, radar, contours, bars, curves) | +| MODIFY | `opensmc/rl/rl_surface.py:57-87` | SAC loading fallback + 4D obs padding | +| MODIFY | `opensmc/rl/__init__.py` | Expand public API | +| MODIFY | `pyproject.toml:18` | Add pandas to [rl] extras | +| CREATE | `tests/test_discovery_env.py` | ~15 tests for SurfaceDiscoveryEnv | +| CREATE | `tests/test_trainer.py` | ~10 tests for train_surface | +| CREATE | `tests/test_benchmark.py` | ~10 tests for benchmark | +| CREATE | `tests/test_visualize.py` | ~7 tests for visualization | +| CREATE | `examples/train_and_fingerprint.py` | End-to-end: train -> fingerprint -> radar | +| CREATE | `examples/full_benchmark.py` | Full benchmark -> LaTeX tables | + +All paths relative to `D:/OpenSMC/python/`. + +--- + +## Task 1: SurfaceDiscoveryEnv — Tests + +**Files:** +- Create: `tests/test_discovery_env.py` + +- [ ] **Step 1: Write test file with all 15 tests** + +```python +"""Tests for SurfaceDiscoveryEnv — Gymnasium environment for RL surface discovery.""" + +import numpy as np +import pytest + +# Mark all tests as requiring rl extras +gymnasium = pytest.importorskip("gymnasium") + + +class TestSurfaceDiscoveryEnvCreation: + """Test environment creation with different plants.""" + + @pytest.mark.parametrize("plant", [ + "double_integrator", "inverted_pendulum", "crane", "quadrotor", "pmsm" + ]) + def test_create_env(self, plant): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant=plant) + assert env.observation_space.shape == (4,) + assert env.action_space.shape == (1,) + env.close() + + def test_create_with_plant_instance(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + from opensmc.plants import DoubleIntegrator + env = SurfaceDiscoveryEnv(plant=DoubleIntegrator()) + assert env.observation_space.shape == (4,) + env.close() + + def test_invalid_plant_string(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + with pytest.raises(ValueError, match="Unknown plant"): + SurfaceDiscoveryEnv(plant="nonexistent") + + +class TestSurfaceDiscoveryEnvGymAPI: + """Test Gymnasium API compliance.""" + + def test_reset_returns_obs_info(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + obs, info = env.reset(seed=42) + assert obs.shape == (4,) + assert isinstance(info, dict) + env.close() + + def test_step_returns_five_tuple(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + env.reset(seed=42) + action = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(action) + assert obs.shape == (4,) + assert isinstance(reward, float) + assert isinstance(terminated, bool) + assert isinstance(truncated, bool) + assert isinstance(info, dict) + env.close() + + def test_episode_terminates(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator", max_steps=10) + env.reset(seed=42) + done = False + steps = 0 + while not done: + obs, reward, terminated, truncated, info = env.step(np.array([0.0])) + done = terminated or truncated + steps += 1 + assert steps <= 10 + env.close() + + def test_obs_within_bounds(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + obs, _ = env.reset(seed=42) + for _ in range(50): + obs, _, term, trunc, _ = env.step(env.action_space.sample()) + if term or trunc: + break + assert np.all(obs >= env.observation_space.low) + assert np.all(obs <= env.observation_space.high) + env.close() + + +class TestSurfaceDiscoveryEnvReward: + """Test reward computation.""" + + def test_zero_action_gives_negative_reward(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + env.reset(seed=42) + _, reward, _, _, _ = env.step(np.array([0.0])) + # Reward should be negative (error penalty) + assert reward < 0 + env.close() + + def test_convergence_bonus(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + env.reset(seed=42) + # Force state near zero for convergence bonus + env.state = np.array([env.ref + 0.001, 0.001]) + env.sigma_prev = 0.0 + env.u_prev = 0.0 + _, reward, _, _, _ = env.step(np.array([0.0])) + # Should include convergence bonus + assert reward > -0.1 # near-zero error + bonus + env.close() + + +class TestSurfaceDiscoveryEnvDisturbances: + """Test disturbance types.""" + + @pytest.mark.parametrize("disturbance", ["none", "constant", "sinusoidal", "step"]) + def test_disturbance_type(self, disturbance): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv( + plant="double_integrator", disturbance=disturbance, + disturbance_amplitude=1.0 + ) + obs, _ = env.reset(seed=42) + obs2, _, _, _, _ = env.step(np.array([0.5])) + assert obs2.shape == (4,) + env.close() + + +class TestSurfaceDiscoveryEnvPMSM: + """Test PMSM-specific edot computation.""" + + def test_pmsm_edot_computed(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="pmsm") + obs, _ = env.reset(seed=42) + # edot should be non-zero (computed from dynamics) + obs2, _, _, _, info = env.step(np.array([0.5])) + # obs[1] is edot — should be finite + assert np.isfinite(obs2[1]) + env.close() + + +class TestSurfaceDiscoveryEnvControlLaw: + """Test internal control law.""" + + def test_control_gains(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv( + plant="double_integrator", + control_gains={"K": 10, "lam": 5, "phi": 0.1} + ) + assert env.K == 10 + assert env.lam == 5 + assert env.phi == 0.1 + env.close() + + def test_sigma_scaling(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator", sigma_max=30.0) + env.reset(seed=42) + # Action of 1.0 should map to sigma_max + # (internal scaling: sigma = action * sigma_max) + obs, _, _, _, info = env.step(np.array([1.0])) + assert "sigma" in info + assert abs(info["sigma"] - 30.0) < 1e-6 + env.close() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/test_discovery_env.py -v --no-header 2>&1 | head -30` +Expected: FAIL — `ModuleNotFoundError: No module named 'opensmc.rl.discovery_env'` + +- [ ] **Step 3: Commit test file** + +```bash +cd D:/OpenSMC/python && git add tests/test_discovery_env.py +git commit -m "test: add SurfaceDiscoveryEnv tests (red phase)" +``` + +--- + +## Task 2: SurfaceDiscoveryEnv — Implementation + +**Files:** +- Create: `opensmc/rl/discovery_env.py` + +Port from `autosmc/envs/discovery_env.py`, adapt to OpenSMC plants. + +- [ ] **Step 1: Write discovery_env.py** + +```python +"""SurfaceDiscoveryEnv — Gymnasium environment for RL surface discovery. + +The RL agent outputs the sliding variable sigma directly. A fixed switching +control law converts sigma to control input u. This isolates surface quality +from controller design, enabling fair comparison with classical surfaces. + +Ported from autosmc/envs/discovery_env.py, adapted to OpenSMC Plant classes. +""" + +import numpy as np + +try: + import gymnasium as gym + from gymnasium import spaces +except ImportError: + raise ImportError( + "gymnasium is required for SurfaceDiscoveryEnv. " + "Install with: pip install opensmc[rl]" + ) + +from opensmc.plants import ( + DoubleIntegrator, InvertedPendulum, SinglePendulumCrane, + Quadrotor6DOF, PMSM, +) + +PLANT_REGISTRY = { + "double_integrator": DoubleIntegrator, + "inverted_pendulum": InvertedPendulum, + "crane": SinglePendulumCrane, + "quadrotor": Quadrotor6DOF, + "pmsm": PMSM, +} + +# (e, edot) extraction from full state for each plant. +# Returns (e, edot) given (state, ref). +# For PMSM, edot is computed separately via _pmsm_edot(). +_ERROR_EXTRACTORS = { + "double_integrator": lambda x, ref: (x[0] - ref, x[1]), + "inverted_pendulum": lambda x, ref: (x[2], x[3]), + "crane": lambda x, ref: (x[0] - ref, x[1]), + "quadrotor": lambda x, ref: (x[2] - ref, x[8]), + "pmsm": lambda x, ref: (x[2] - ref, None), # edot computed separately +} + +# Default initial state ranges for reset randomization. +_RESET_RANGES = { + "double_integrator": {"pos": (-2, 2), "vel": (-1, 1)}, + "inverted_pendulum": {"pos": (-0.5, 0.5), "vel": (-0.3, 0.3)}, + "crane": {"pos": (-0.5, 0.5), "vel": (-0.3, 0.3)}, + "quadrotor": {"pos": (-1, 1), "vel": (-0.5, 0.5)}, + "pmsm": {"pos": (-5, 5), "vel": (-1, 1)}, +} + +# Default reference values for each plant. +_DEFAULT_REFS = { + "double_integrator": 1.0, + "inverted_pendulum": 0.0, + "crane": 0.5, + "quadrotor": 1.0, + "pmsm": 100.0, +} + + +def _pmsm_edot(plant, state): + """Compute PMSM angular acceleration from dynamics. + + domega/dt = (Te - B*omega - TL) / J + where Te = 1.5 * pp * psi_f * i_q (non-salient pole) + """ + i_q = state[1] + omega = state[2] + Te = 1.5 * plant.pp * plant.psi_f * i_q + return (Te - plant.B * omega - plant.TL) / plant.J + + +class SurfaceDiscoveryEnv(gym.Env): + """Gymnasium environment where RL agent discovers sliding surfaces. + + The agent outputs sigma (the sliding variable) at each step. + A fixed switching control law u = -K*sat(sigma/phi) - lam*sigma + converts sigma to control input. The reward penalizes tracking + error and control effort while rewarding convergence. + + Parameters + ---------- + plant : str or Plant + Plant name (key in PLANT_REGISTRY) or Plant instance. + disturbance : str + Disturbance type: "none", "constant", "sinusoidal", "step". + disturbance_amplitude : float + Amplitude of the disturbance signal. + dt : float + Integration time step. + max_steps : int + Maximum episode length. + sigma_max : float + Action clipping: agent output in [-1,1] is scaled to [-sigma_max, sigma_max]. + control_gains : dict + Keys: "K", "lam", "phi" for the switching control law. + ref : float or None + Reference value. If None, uses plant-specific default. + """ + + metadata = {"render_modes": []} + + def __init__( + self, + plant="double_integrator", + disturbance="none", + disturbance_amplitude=1.0, + dt=0.01, + max_steps=500, + sigma_max=20.0, + control_gains=None, + ref=None, + rk4_substeps=4, + ): + super().__init__() + + # Resolve plant + if isinstance(plant, str): + if plant not in PLANT_REGISTRY: + raise ValueError( + f"Unknown plant '{plant}'. " + f"Available: {list(PLANT_REGISTRY.keys())}" + ) + self.plant_name = plant + self.plant = PLANT_REGISTRY[plant]() + else: + self.plant = plant + # Find name by class match + self.plant_name = next( + (k for k, v in PLANT_REGISTRY.items() + if isinstance(plant, v)), + "custom" + ) + + # Environment parameters + self.disturbance_type = disturbance + self.disturbance_amplitude = disturbance_amplitude + self.dt = dt + self.max_steps = max_steps + self.sigma_max = sigma_max + self.rk4_substeps = rk4_substeps + self.sub_dt = dt / rk4_substeps + + # Control gains + gains = control_gains or {} + self.K = gains.get("K", 5.0) + self.lam = gains.get("lam", 3.0) + self.phi = gains.get("phi", 0.05) + + # Reference + self.ref = ref if ref is not None else _DEFAULT_REFS.get(self.plant_name, 1.0) + + # Error extractor + self._extract_error = _ERROR_EXTRACTORS.get(self.plant_name) + if self._extract_error is None: + raise ValueError(f"No error extractor for plant '{self.plant_name}'") + + # Spaces + obs_low = np.array([-10.0, -10.0, -50.0, 0.0], dtype=np.float32) + obs_high = np.array([10.0, 10.0, 50.0, 100.0], dtype=np.float32) + self.observation_space = spaces.Box(obs_low, obs_high, dtype=np.float32) + self.action_space = spaces.Box( + low=-1.0, high=1.0, shape=(1,), dtype=np.float32 + ) + + # State (set in reset) + self.state = None + self.sigma_prev = 0.0 + self.u_prev = 0.0 + self.step_count = 0 + self.t = 0.0 + + def _get_error(self, state): + """Extract (e, edot) from full state.""" + e, edot = self._extract_error(state, self.ref) + if edot is None: + # PMSM: compute edot from dynamics + edot = _pmsm_edot(self.plant, state) + return float(e), float(edot) + + def _get_disturbance(self, t): + """Compute disturbance signal at time t.""" + amp = self.disturbance_amplitude + if self.disturbance_type == "none": + return 0.0 + elif self.disturbance_type == "constant": + return amp + elif self.disturbance_type == "sinusoidal": + return amp * np.sin(2 * np.pi * t) + elif self.disturbance_type == "step": + return amp if t > 1.0 else 0.0 + return 0.0 + + def _format_disturbance(self, d_scalar): + """Convert scalar disturbance to plant-specific format. + + Quadrotor6DOF expects d as ndarray(12,) — disturbance is applied + to the z-acceleration channel (index 8). All other plants handle + scalar d natively. + """ + if self.plant_name == "quadrotor": + d = np.zeros(12) + d[8] = d_scalar # affect z acceleration + return d + return d_scalar + + def _rk4_step(self, t, x, u, d_scalar): + """Single RK4 step of plant dynamics.""" + d = self._format_disturbance(d_scalar) + # Quadrotor expects 4-element u; env controls altitude only + if self.plant_name == "quadrotor": + u_full = np.array([u, 0.0, 0.0, 0.0]) + elif self.plant_name == "pmsm": + u_full = np.array([0.0, u]) # v_d=0, v_q=u + else: + u_full = u + f = lambda t_, x_: np.array( + self.plant.dynamics(t_, x_, u_full, d), dtype=np.float64 + ) + dt = self.sub_dt + k1 = f(t, x) + k2 = f(t + dt / 2, x + dt / 2 * k1) + k3 = f(t + dt / 2, x + dt / 2 * k2) + k4 = f(t + dt, x + dt * k3) + return x + (dt / 6) * (k1 + 2 * k2 + 2 * k3 + k4) + + def reset(self, seed=None, options=None): + super().reset(seed=seed) + + # Randomize initial state + ranges = _RESET_RANGES.get(self.plant_name, {"pos": (-1, 1), "vel": (-0.5, 0.5)}) + n = self.plant.n_states + x0 = np.zeros(n, dtype=np.float64) + + if self.plant_name == "double_integrator": + x0[0] = self.np_random.uniform(*ranges["pos"]) + x0[1] = self.np_random.uniform(*ranges["vel"]) + elif self.plant_name == "inverted_pendulum": + x0[2] = self.np_random.uniform(*ranges["pos"]) + x0[3] = self.np_random.uniform(*ranges["vel"]) + elif self.plant_name == "crane": + x0[0] = self.np_random.uniform(*ranges["pos"]) + x0[1] = self.np_random.uniform(*ranges["vel"]) + elif self.plant_name == "quadrotor": + x0[2] = self.np_random.uniform(*ranges["pos"]) + x0[8] = self.np_random.uniform(*ranges["vel"]) + elif self.plant_name == "pmsm": + x0[2] = self.ref + self.np_random.uniform(*ranges["pos"]) + x0[1] = self.np_random.uniform(*ranges["vel"]) + + self.state = x0 + self.sigma_prev = 0.0 + self.u_prev = 0.0 + self.step_count = 0 + self.t = 0.0 + + e, edot = self._get_error(self.state) + obs = self._build_obs(e, edot) + return obs, {"e": e, "edot": edot} + + def step(self, action): + sigma = float(action[0]) * self.sigma_max + + # Control law: u = -K * sat(sigma/phi) - lam * sigma + sat_s = np.clip(sigma / self.phi, -1.0, 1.0) + u = -self.K * sat_s - self.lam * sigma + du = u - self.u_prev + + # Disturbance + d = self._get_disturbance(self.t) + + # Integrate with RK4 substeps + x = self.state.copy() + for _ in range(self.rk4_substeps): + x = self._rk4_step(self.t, x, u, d) + self.t += self.sub_dt + self.state = x + + # Error + e, edot = self._get_error(self.state) + + # Reward + reward = -1.0 * e**2 - 0.01 * u**2 - 0.005 * du**2 + + # Convergence bonus + if abs(e) < 0.02 and abs(edot) < 0.05: + reward += 5.0 * self.dt + + # Update history + self.sigma_prev = sigma + self.u_prev = u + self.step_count += 1 + + # Truncation (state diverged) + truncated = abs(e) > 10.0 or abs(edot) > 20.0 + if truncated: + reward -= 50.0 + + # Termination (episode length) + terminated = False + if self.step_count >= self.max_steps: + truncated = True + + obs = self._build_obs(e, edot) + info = {"e": e, "edot": edot, "sigma": sigma, "u": u, "t": self.t} + return obs, float(reward), terminated, truncated, info + + def _build_obs(self, e, edot): + """Build clipped observation vector.""" + obs = np.array([ + e, edot, self.sigma_prev, abs(self.u_prev) + ], dtype=np.float32) + return np.clip(obs, self.observation_space.low, self.observation_space.high) + + +# Register with Gymnasium +try: + if "OpenSMC/SurfaceDiscovery-v0" not in gym.registry: + gym.register( + id="OpenSMC/SurfaceDiscovery-v0", + entry_point="opensmc.rl.discovery_env:SurfaceDiscoveryEnv", + ) +except Exception: + pass +``` + +- [ ] **Step 2: Run tests to verify they pass** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/test_discovery_env.py -v --no-header` +Expected: 15 PASSED + +- [ ] **Step 3: Commit** + +```bash +cd D:/OpenSMC/python && git add opensmc/rl/discovery_env.py +git commit -m "feat: add SurfaceDiscoveryEnv for RL surface discovery" +``` + +--- + +## Task 3: Trainer — Tests + +**Files:** +- Create: `tests/test_trainer.py` + +- [ ] **Step 1: Write test file** + +```python +"""Tests for RL surface trainer — PPO/SAC training pipeline.""" + +import os +import tempfile +import numpy as np +import pytest + +pytest.importorskip("stable_baselines3") + + +class TestTrainSurface: + """Test train_surface function.""" + + def test_ppo_training_completes(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="double_integrator", algorithm="PPO", + disturbance="none", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + assert os.path.exists(result.model_path) + assert os.path.exists(result.vecnorm_path) + assert result.algorithm == "PPO" + assert result.plant == "double_integrator" + assert result.training_time > 0 + + def test_sac_training_completes(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="double_integrator", algorithm="SAC", + disturbance="none", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + assert os.path.exists(result.model_path) + assert result.algorithm == "SAC" + + def test_model_file_naming(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="inverted_pendulum", algorithm="PPO", + disturbance="sinusoidal", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + assert "PPO_inverted_pendulum_sinusoidal" in result.model_path + + def test_reward_history_populated(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="double_integrator", algorithm="PPO", + disturbance="none", total_timesteps=5000, + n_envs=1, output_dir=str(tmp_path), + eval_freq=2000, seed=42, + ) + assert len(result.reward_history) >= 1 + assert len(result.ise_history) >= 1 + + def test_invalid_algorithm_raises(self, tmp_path): + from opensmc.rl.trainer import train_surface + with pytest.raises(ValueError, match="Unknown algorithm"): + train_surface( + plant="double_integrator", algorithm="TD3", + total_timesteps=1000, output_dir=str(tmp_path), + ) + + def test_vecnorm_stats_saved(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="double_integrator", algorithm="PPO", + disturbance="none", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + assert result.vecnorm_path.endswith(".pkl") + assert os.path.getsize(result.vecnorm_path) > 0 + + +class TestTrainAllSurfaces: + """Test batch training.""" + + def test_trains_multiple_models(self, tmp_path): + from opensmc.rl.trainer import train_all_surfaces + results = train_all_surfaces( + plants=["double_integrator"], + algorithms=["PPO"], + disturbances=["none", "constant"], + total_timesteps=2048, + output_dir=str(tmp_path), + n_envs=1, base_seed=42, + ) + assert len(results) == 2 + assert all(os.path.exists(r.model_path) for r in results) + + def test_seed_propagation(self, tmp_path): + from opensmc.rl.trainer import train_all_surfaces + results = train_all_surfaces( + plants=["double_integrator"], + algorithms=["PPO"], + disturbances=["none", "constant"], + total_timesteps=2048, + output_dir=str(tmp_path), + n_envs=1, base_seed=42, + ) + # Seeds should differ between runs + assert results[0].model_path != results[1].model_path + + +class TestTrainedModelLoading: + """Test that trained models can be loaded as RLDiscoveredSurface.""" + + def test_load_trained_model(self, tmp_path): + from opensmc.rl.trainer import train_surface + from opensmc.rl import RLDiscoveredSurface + + result = train_surface( + plant="double_integrator", algorithm="PPO", + disturbance="none", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + surface = RLDiscoveredSurface( + result.model_path, + obs_fn=lambda e, ed, t: np.array([e, ed, 0.0, 0.0], dtype=np.float32) + ) + s = surface.compute(1.0, 0.5, 0.0) + assert np.isfinite(s) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/test_trainer.py -v --no-header 2>&1 | head -20` +Expected: FAIL — `ModuleNotFoundError: No module named 'opensmc.rl.trainer'` + +- [ ] **Step 3: Commit test file** + +```bash +cd D:/OpenSMC/python && git add tests/test_trainer.py +git commit -m "test: add trainer tests (red phase)" +``` + +--- + +## Task 4: Trainer — Implementation + +**Files:** +- Create: `opensmc/rl/trainer.py` + +- [ ] **Step 1: Write trainer.py** + +```python +"""RL Surface Trainer — PPO/SAC training pipeline with VecNormalize. + +Wraps stable-baselines3 with OpenSMC-specific defaults validated in the +autosmc subproject (12 trained models, 80+ figures). +""" + +import os +import time +from dataclasses import dataclass + +import numpy as np + +try: + from stable_baselines3 import PPO, SAC + from stable_baselines3.common.vec_env import DummyVecEnv + from stable_baselines3.common.vec_env import VecNormalize + from stable_baselines3.common.callbacks import BaseCallback, EvalCallback + from stable_baselines3.common.monitor import Monitor +except ImportError: + raise ImportError( + "stable-baselines3 is required for training. " + "Install with: pip install opensmc[rl]" + ) + +from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + + +@dataclass +class TrainingResult: + """Result of a single training run.""" + model_path: str + vecnorm_path: str + best_model_path: str | None + reward_history: list + ise_history: list + final_reward: float + final_ise: float + training_time: float + algorithm: str + plant: str + disturbance: str + seed: int + + +class _ISECallback(BaseCallback): + """Custom callback that tracks reward and ISE on unnormalized eval env.""" + + def __init__(self, eval_env, check_freq=10_000, verbose=0): + super().__init__(verbose) + self.eval_env = eval_env + self.check_freq = check_freq + self.rewards = [] + self.ises = [] + + def _on_step(self): + if self.n_calls % self.check_freq == 0: + obs, _ = self.eval_env.reset(seed=42) + total_reward = 0.0 + errors_sq = [] + for _ in range(self.eval_env.max_steps): + action, _ = self.model.predict(obs, deterministic=True) + obs, reward, terminated, truncated, info = self.eval_env.step(action) + total_reward += reward + errors_sq.append(info["e"] ** 2) + if terminated or truncated: + break + ise = sum(errors_sq) * self.eval_env.dt + self.rewards.append(total_reward) + self.ises.append(ise) + if self.verbose: + print(f" Step {self.n_calls:>7d} | reward={total_reward:8.2f} | ISE={ise:.6f}") + return True + + +def _make_env(plant, disturbance, env_kwargs): + """Factory for creating SurfaceDiscoveryEnv instances.""" + def _init(): + kw = dict(env_kwargs) if env_kwargs else {} + return SurfaceDiscoveryEnv(plant=plant, disturbance=disturbance, **kw) + return _init + + +def train_surface( + plant="double_integrator", + algorithm="PPO", + disturbance="none", + total_timesteps=500_000, + n_envs=4, + net_arch=None, + seed=42, + output_dir="trained_models", + eval_freq=10_000, + save_best=True, + normalize_obs=True, + normalize_reward=True, + clip_obs=10.0, + env_kwargs=None, +): + """Train an RL agent to discover a sliding surface. + + Parameters + ---------- + plant : str + Plant name (see PLANT_REGISTRY in discovery_env). + algorithm : str + "PPO" or "SAC". + disturbance : str + "none", "constant", "sinusoidal", or "step". + total_timesteps : int + Total training steps. + n_envs : int + Number of parallel training environments. + net_arch : list or None + MLP hidden layer sizes. Default [64, 64]. + seed : int + Random seed. + output_dir : str + Directory to save model and stats. + eval_freq : int + Evaluate every N steps. + save_best : bool + Save best model by mean reward. + normalize_obs, normalize_reward : bool + VecNormalize settings. + clip_obs : float + Observation clipping for VecNormalize. + env_kwargs : dict or None + Extra kwargs for SurfaceDiscoveryEnv. + + Returns + ------- + TrainingResult + """ + if algorithm not in ("PPO", "SAC"): + raise ValueError(f"Unknown algorithm '{algorithm}'. Use 'PPO' or 'SAC'.") + + if net_arch is None: + net_arch = [64, 64] + + os.makedirs(output_dir, exist_ok=True) + tag = f"{algorithm}_{plant}_{disturbance}" + + # Training env: vectorized + normalized + env = DummyVecEnv([_make_env(plant, disturbance, env_kwargs) for _ in range(n_envs)]) + env = VecNormalize( + env, norm_obs=normalize_obs, norm_reward=normalize_reward, clip_obs=clip_obs + ) + + # Eval env: unnormalized (for clean metrics) + eval_env = SurfaceDiscoveryEnv(plant=plant, disturbance=disturbance, **(env_kwargs or {})) + + # Callbacks + ise_cb = _ISECallback(eval_env, check_freq=eval_freq, verbose=1) + callbacks = [ise_cb] + + best_model_path = None + if save_best: + best_dir = os.path.join(output_dir, f"{tag}_best") + eval_cb = EvalCallback( + DummyVecEnv([_make_env(plant, disturbance, env_kwargs)]), + best_model_save_path=best_dir, + log_path=best_dir, + eval_freq=eval_freq, + deterministic=True, + render=False, + ) + callbacks.append(eval_cb) + best_model_path = os.path.join(best_dir, "best_model.zip") + + # Create model + policy_kwargs = {"net_arch": net_arch} + if algorithm == "PPO": + model = PPO( + "MlpPolicy", env, + learning_rate=3e-4, n_steps=512, batch_size=64, + n_epochs=10, gamma=0.99, gae_lambda=0.95, + clip_range=0.2, + policy_kwargs=policy_kwargs, + verbose=0, seed=seed, + ) + else: # SAC + model = SAC( + "MlpPolicy", env, + learning_rate=3e-4, batch_size=256, + buffer_size=100_000, gamma=0.99, + policy_kwargs=policy_kwargs, + verbose=0, seed=seed, + ) + + # Train + t0 = time.time() + model.learn(total_timesteps=total_timesteps, callback=callbacks) + elapsed = time.time() - t0 + + # Save + model_path = os.path.join(output_dir, f"{tag}.zip") + norm_path = os.path.join(output_dir, f"{tag}_vecnorm.pkl") + model.save(model_path) + env.save(norm_path) + + eval_env.close() + env.close() + + return TrainingResult( + model_path=model_path, + vecnorm_path=norm_path, + best_model_path=best_model_path if save_best and os.path.exists(best_model_path) else None, + reward_history=ise_cb.rewards, + ise_history=ise_cb.ises, + final_reward=ise_cb.rewards[-1] if ise_cb.rewards else 0.0, + final_ise=ise_cb.ises[-1] if ise_cb.ises else float("inf"), + training_time=elapsed, + algorithm=algorithm, + plant=plant, + disturbance=disturbance, + seed=seed, + ) + + +def train_all_surfaces( + plants=None, + algorithms=None, + disturbances=None, + total_timesteps=500_000, + output_dir="trained_models", + n_envs=4, + base_seed=42, + **kwargs, +): + """Train models for all (plant, algorithm, disturbance) combinations. + + Returns list of TrainingResult, one per combination. + Seed for run i = base_seed + i. + """ + if plants is None: + plants = ["double_integrator", "inverted_pendulum", "crane", "quadrotor", "pmsm"] + if algorithms is None: + algorithms = ["PPO", "SAC"] + if disturbances is None: + disturbances = ["none", "constant", "sinusoidal", "step"] + + results = [] + i = 0 + for plant in plants: + for algo in algorithms: + for dist in disturbances: + result = train_surface( + plant=plant, algorithm=algo, disturbance=dist, + total_timesteps=total_timesteps, + output_dir=output_dir, n_envs=n_envs, + seed=base_seed + i, **kwargs, + ) + results.append(result) + i += 1 + return results +``` + +- [ ] **Step 2: Run tests to verify they pass** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/test_trainer.py -v --no-header -x` +Expected: 10 PASSED (training tests take ~30-60 seconds total at 2K-5K steps) + +- [ ] **Step 3: Commit** + +```bash +cd D:/OpenSMC/python && git add opensmc/rl/trainer.py +git commit -m "feat: add PPO/SAC surface training pipeline" +``` + +--- + +## Task 5: RLDiscoveredSurface Fixes + +**Files:** +- Modify: `opensmc/rl/rl_surface.py:57-87` + +- [ ] **Step 1: Write test for SAC loading + 4D obs** + +Add to existing `tests/test_rl_surface.py`: + +```python +def test_load_sb3_sac_fallback(): + """SAC model loading when PPO.load fails.""" + from opensmc.rl import RLDiscoveredSurface + # Test with callable that mimics 4D input + def model_4d(obs): + assert len(obs) == 4 + return np.array([obs[0] + obs[1]]) + surface = RLDiscoveredSurface( + model_4d, + obs_fn=lambda e, ed, t: np.array([e, ed, 0.0, 0.0], dtype=np.float32) + ) + s = surface.compute(1.0, 0.5) + assert abs(s - 1.5) < 1e-6 + + +def test_4d_obs_padding(): + """RLDiscoveredSurface pads to 4D when use_4d_obs=True.""" + from opensmc.rl import RLDiscoveredSurface + obs_log = [] + def model_fn(obs): + obs_log.append(obs.copy()) + return np.array([obs[0] * 2]) + surface = RLDiscoveredSurface(model_fn, use_4d_obs=True) + s = surface.compute(1.5, 0.3) + assert len(obs_log[-1]) == 4 + assert obs_log[-1][2] == 0.0 # sigma_prev = 0 + assert obs_log[-1][3] == 0.0 # |u_prev| = 0 + assert abs(s - 3.0) < 1e-6 +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/test_rl_surface.py -v -k "sac_fallback or 4d_obs" --no-header` +Expected: FAIL + +- [ ] **Step 3: Modify rl_surface.py — SAC fallback in _load_sb3** + +Replace lines 57-66: + +```python + @staticmethod + def _load_sb3(path): + """Load a Stable-Baselines3 model (PPO or SAC) from file.""" + try: + from stable_baselines3 import PPO, SAC + except ImportError: + raise ImportError( + "stable-baselines3 is required to load SB3 models. " + "Install with: pip install opensmc[rl]") + try: + return PPO.load(path) + except Exception: + return SAC.load(path) +``` + +- [ ] **Step 4: Modify rl_surface.py — add use_4d_obs parameter** + +Update `__init__` (line 43) to accept `use_4d_obs=False`: + +```python + def __init__(self, model, obs_fn=None, scale=1.0, use_4d_obs=False): + self.scale = scale + self._obs_fn = obs_fn + self._use_4d_obs = use_4d_obs + # ... rest unchanged +``` + +Update `_make_obs` (lines 73-87) to pad when `use_4d_obs=True`: + +```python + def _make_obs(self, e, edot, t): + """Build observation vector from error state.""" + if self._obs_fn is not None: + return self._obs_fn(e, edot, t) + + e_scalar = float(e) if np.ndim(e) == 0 else e + edot_scalar = float(edot) if np.ndim(edot) == 0 else edot + + if np.ndim(e_scalar) == 0: + if self._use_4d_obs: + return np.array([e_scalar, edot_scalar, 0.0, 0.0], dtype=np.float32) + return np.array([e_scalar, edot_scalar], dtype=np.float32) + else: + base = np.concatenate([ + np.atleast_1d(e_scalar), + np.atleast_1d(edot_scalar) + ]).astype(np.float32) + if self._use_4d_obs: + return np.concatenate([base, np.zeros(2, dtype=np.float32)]) + return base +``` + +- [ ] **Step 5: Run all RL surface tests** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/test_rl_surface.py -v --no-header` +Expected: ALL PASSED + +- [ ] **Step 6: Commit** + +```bash +cd D:/OpenSMC/python && git add opensmc/rl/rl_surface.py tests/test_rl_surface.py +git commit -m "feat: add SAC loading fallback and 4D obs padding to RLDiscoveredSurface" +``` + +--- + +## Task 6: Benchmark — Tests + +**Files:** +- Create: `tests/test_benchmark.py` + +- [ ] **Step 1: Write test file** + +```python +"""Tests for benchmark module — RL vs classical controllers.""" + +import os +import numpy as np +import pytest + +pytest.importorskip("pandas") + + +class TestBenchmarkFixedLaw: + """Test Table 1 — fixed switching law simulations.""" + + def test_simulate_single_surface(self): + from opensmc.rl.benchmark import _simulate_fixed_law + from opensmc.surfaces import LinearSurface + from opensmc.plants import DoubleIntegrator + result = _simulate_fixed_law( + surface=LinearSurface(c=10), + plant=DoubleIntegrator(), + T=1.0, dt=0.01, + gains={"K": 5, "lam": 3, "phi": 0.05}, + ref=1.0, + ) + assert "ise" in result + assert "settling_time" in result + assert result["ise"] > 0 + assert np.isfinite(result["ise"]) + + def test_all_surfaces_run(self): + from opensmc.rl.benchmark import _get_classical_surfaces + surfaces = _get_classical_surfaces() + assert len(surfaces) == 11 + + +class TestBenchmarkMatchedController: + """Test Table 2 — best-matched controller simulations.""" + + def test_simulate_single_controller(self): + from opensmc.rl.benchmark import _simulate_matched_controller + from opensmc.plants import DoubleIntegrator + result = _simulate_matched_controller( + controller_name="ClassicalSMC", + plant=DoubleIntegrator(), + T=1.0, dt=0.01, ref=1.0, + ) + assert "ise" in result + assert np.isfinite(result["ise"]) + + def test_controller_factory_returns_17(self): + from opensmc.rl.benchmark import _get_controller_configs + from opensmc.plants import DoubleIntegrator + configs = _get_controller_configs(DoubleIntegrator()) + assert len(configs) == 17 + + +class TestBenchmarkFull: + """Test full benchmark pipeline (small scale).""" + + def test_benchmark_mini(self): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, # skip RL (no models) + algorithms=[], + disturbances=["none"], + T=1.0, dt=0.01, + include_matched=False, + ) + assert results.table1 is not None + assert len(results.table1) > 0 + + def test_benchmark_with_matched(self): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, + algorithms=[], + disturbances=["none"], + T=1.0, dt=0.01, + include_matched=True, + ) + assert results.table2 is not None + assert len(results.table2) > 0 + + +class TestBenchmarkOutput: + """Test output formats.""" + + def test_to_latex(self): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, algorithms=[], + disturbances=["none"], T=1.0, dt=0.01, + include_matched=False, + ) + latex = results.to_latex() + assert "tabular" in latex or "\\begin" in latex + + def test_to_json(self, tmp_path): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, algorithms=[], + disturbances=["none"], T=1.0, dt=0.01, + include_matched=False, + ) + path = str(tmp_path / "results.json") + results.to_json(path) + assert os.path.exists(path) + + def test_summary_string(self): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, algorithms=[], + disturbances=["none"], T=1.0, dt=0.01, + include_matched=False, + ) + s = results.summary() + assert isinstance(s, str) + assert len(s) > 0 +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/test_benchmark.py -v --no-header 2>&1 | head -20` +Expected: FAIL — `ModuleNotFoundError: No module named 'opensmc.rl.benchmark'` + +- [ ] **Step 3: Commit test file** + +```bash +cd D:/OpenSMC/python && git add tests/test_benchmark.py +git commit -m "test: add benchmark tests (red phase)" +``` + +--- + +## Task 7: Benchmark — Implementation + +**Files:** +- Create: `opensmc/rl/benchmark.py` + +This is the most complex module (~350 LOC). It orchestrates simulations using OpenSMC's existing `Simulator` and `metrics` modules. + +- [ ] **Step 1: Write benchmark.py** + +```python +"""Benchmark — RL vs classical SMC controllers across plants. + +Runs two comparison tables: + Table 1: Fixed switching law (isolates surface quality) + Table 2: Best-matched controller (practical performance) +""" + +import json +import logging +import os +import warnings +from dataclasses import dataclass, field + +import numpy as np + +try: + import pandas as pd +except ImportError: + raise ImportError("pandas is required for benchmarking. Install with: pip install opensmc[rl]") + +from opensmc.simulator import Simulator +from opensmc import metrics as M +from opensmc.surfaces import ( + LinearSurface, TerminalSurface, NonsingularTerminalSurface, + FastTerminalSurface, IntegralSlidingSurface, IntegralTerminalSurface, + HierarchicalSurface, PIDSurface, GlobalSurface, + PredefinedTimeSurface, NonlinearDampingSurface, +) +from opensmc.controllers import ( + ClassicalSMC, AdaptiveSMC, DynamicSMC, ITSMC, NFTSMC, + FixedTimeSMC, FuzzySMC, DiscreteSMC, + CombiningHSMC, AggregatedHSMC, IncrementalHSMC, + TwistingSMC, QuasiContinuous2SMC, + NestedHOSMC, QuasiContinuousHOSMC, + PID, LQR, +) +from opensmc.plants import ( + DoubleIntegrator, InvertedPendulum, SinglePendulumCrane, + Quadrotor6DOF, PMSM, +) + +logger = logging.getLogger(__name__) + +PLANT_MAP = { + "double_integrator": DoubleIntegrator, + "inverted_pendulum": InvertedPendulum, + "crane": SinglePendulumCrane, + "quadrotor": Quadrotor6DOF, + "pmsm": PMSM, +} + +_REFS = { + "double_integrator": 1.0, + "inverted_pendulum": 0.0, + "crane": 0.5, + "quadrotor": 1.0, + "pmsm": 100.0, +} + + +def _get_classical_surfaces(): + """Return dict of {name: surface_instance} for the 11 classical surfaces.""" + return { + "Linear": LinearSurface(c=10), + "Terminal": TerminalSurface(beta=2.0, p=5, q=7), + "NonsingularTerminal": NonsingularTerminalSurface(beta=2.0, p=5, q=7), + "FastTerminal": FastTerminalSurface(alpha=5.0, beta=2.0, p=5, q=7), + "IntegralSliding": IntegralSlidingSurface(c=10.0), + "IntegralTerminal": IntegralTerminalSurface(c1=5.0, c2=2.0, p=5, q=7), + "Hierarchical": HierarchicalSurface(), + "PID": PIDSurface(alpha=5.0, beta=3.0, gamma=1.0), + "Global": GlobalSurface(c=10.0, alpha=2.0), + "PredefinedTime": PredefinedTimeSurface(Tc=2.0), + "NonlinearDamping": NonlinearDampingSurface(c=10.0), + } + + +def _get_controller_configs(plant): + """Return dict of {name: controller_instance} for all 17 controllers. + + Uses composition: each controller paired with its best-matched surface. + Standalone controllers (HOSMC, PID, LQR) don't need a surface. + """ + configs = { + "ClassicalSMC": ClassicalSMC(surface=LinearSurface(c=10)), + "AdaptiveSMC": AdaptiveSMC(surface=NonlinearDampingSurface(c=10)), + "DynamicSMC": DynamicSMC(surface=LinearSurface(c=10)), + "ITSMC": ITSMC(surface=IntegralTerminalSurface(c1=5, c2=2, p=5, q=7)), + "NFTSMC": NFTSMC(surface=NonsingularTerminalSurface(beta=2, p=5, q=7)), + "FixedTimeSMC": FixedTimeSMC(surface=PredefinedTimeSurface(Tc=2.0)), + "FuzzySMC": FuzzySMC(surface=LinearSurface(c=10)), + "DiscreteSMC": DiscreteSMC(surface=LinearSurface(c=10)), + "CombiningHSMC": CombiningHSMC(surface=HierarchicalSurface()), + "AggregatedHSMC": AggregatedHSMC(surface=HierarchicalSurface()), + "IncrementalHSMC": IncrementalHSMC(surface=HierarchicalSurface()), + "TwistingSMC": TwistingSMC(surface=LinearSurface(c=10)), + "QuasiContinuous2SMC": QuasiContinuous2SMC(surface=LinearSurface(c=10)), + "NestedHOSMC": NestedHOSMC(order=2), + "QuasiContinuousHOSMC": QuasiContinuousHOSMC(order=2), + "PID": PID(Kp=10, Kd=5, Ki=0.5), + } + # LQR needs plant-specific A, B matrices + try: + n = plant.n_states + A = np.zeros((n, n)) + B = np.zeros((n, 1)) + if n >= 2: + A[0, 1] = 1.0 + B[1, 0] = 1.0 + configs["LQR"] = LQR(A=A, B=B) + except Exception: + configs["LQR"] = PID(Kp=10, Kd=5, Ki=0.5) # fallback + + return configs + + +def _disturbance_fn(dist_type, amplitude=1.0): + """Return a disturbance function d(t).""" + if dist_type == "none": + return lambda t: 0.0 + elif dist_type == "constant": + return lambda t: amplitude + elif dist_type == "sinusoidal": + return lambda t: amplitude * np.sin(2 * np.pi * t) + elif dist_type == "step": + return lambda t: amplitude if t > 1.0 else 0.0 + return lambda t: 0.0 + + +def _extract_metrics(result): + """Extract benchmark metrics from a simulation result dict.""" + try: + return { + "ise": float(M.ise(result)), + "itae": float(M.itae(result)), + "settling_time": float(M.settling_time(result)), + "overshoot": float(M.overshoot(result)), + "chattering_index": float(M.chattering_index(result)), + "ss_error": float(M.ss_error(result)), + } + except Exception as e: + logger.warning(f"Metrics extraction failed: {e}") + return { + "ise": float("nan"), "itae": float("nan"), + "settling_time": float("nan"), "overshoot": float("nan"), + "chattering_index": float("nan"), "ss_error": float("nan"), + } + + +def _simulate_fixed_law(surface, plant, T, dt, gains, ref, dist_fn=None): + """Run one simulation with exact fixed switching law. + + u = -K * sat(s/phi) - lam * s + + This matches the SurfaceDiscoveryEnv's internal control law exactly, + ensuring fair comparison between RL and classical surfaces. + We do NOT use ClassicalSMC here because its compute() uses a different + reaching law (k * sign(s)), which would not isolate surface quality. + """ + K, lam, phi = gains["K"], gains["lam"], gains["phi"] + n_steps = int(T / dt) + x = plant.get_default_x0().copy() + t_arr, e_arr, u_arr, s_arr = [], [], [], [] + t = 0.0 + + for i in range(n_steps): + # Error (use first state as position, second as velocity for simple plants) + e = float(x[0]) - ref + edot = float(x[1]) if len(x) > 1 else 0.0 + + # Surface + surface.reset() if i == 0 else None + s = surface.compute(e, edot, t) + + # Fixed switching law (matches training env exactly) + sat_s = np.clip(s / phi, -1.0, 1.0) + u = -K * sat_s - lam * s + + # Disturbance + d = dist_fn(t) if dist_fn else 0.0 + + # RK4 integration + f = lambda t_, x_: np.array(plant.dynamics(t_, x_, u, d), dtype=np.float64) + k1 = f(t, x) + k2 = f(t + dt/2, x + dt/2 * k1) + k3 = f(t + dt/2, x + dt/2 * k2) + k4 = f(t + dt, x + dt * k3) + x = x + (dt / 6) * (k1 + 2*k2 + 2*k3 + k4) + t += dt + + t_arr.append(t) + e_arr.append(e) + u_arr.append(u) + s_arr.append(s) + + # Build result dict compatible with metrics module + result = { + "t": np.array(t_arr), + "e": np.array(e_arr).reshape(-1, 1), + "u": np.array(u_arr).reshape(-1, 1), + "s": np.array(s_arr), + "x": np.zeros((len(t_arr), len(x))), # placeholder + "xref": np.full((len(t_arr), 1), ref), + } + return _extract_metrics(result) + + +def _simulate_matched_controller(controller_name, plant, T, dt, ref, dist_fn=None, + controller_configs=None): + """Run one simulation with a specific controller.""" + if controller_configs is None: + controller_configs = _get_controller_configs(plant) + ctrl = controller_configs[controller_name] + ctrl.reset() + sim = Simulator(dt=dt, T=T) + ref_fn = lambda t: np.array([ref]) + result = sim.run(ctrl, plant, ref_fn=ref_fn, dist_fn=dist_fn) + return _extract_metrics(result) + + +def _load_rl_surface(models_dir, algo, plant, disturbance): + """Load a trained RL surface from disk. Returns None if not found.""" + if models_dir is None: + return None + model_path = os.path.join(models_dir, f"{algo}_{plant}_{disturbance}.zip") + if not os.path.exists(model_path): + logger.warning(f"Model not found: {model_path}, skipping RL entry") + return None + from opensmc.rl import RLDiscoveredSurface + return RLDiscoveredSurface(model_path, use_4d_obs=True) + + +@dataclass +class BenchmarkResults: + """Results from the full benchmark suite.""" + table1: pd.DataFrame + table2: pd.DataFrame | None + fingerprints: dict = field(default_factory=dict) + rankings: dict = field(default_factory=dict) + + def to_latex(self): + """Export tables as LaTeX.""" + parts = [] + if self.table1 is not None: + parts.append("% Table 1: Fixed switching law\n") + parts.append(self.table1.to_latex(float_format="%.4f")) + if self.table2 is not None: + parts.append("\n% Table 2: Best-matched controller\n") + parts.append(self.table2.to_latex(float_format="%.4f")) + return "\n".join(parts) + + def to_json(self, path): + """Export results as JSON.""" + data = {} + if self.table1 is not None: + data["table1"] = self.table1.to_dict(orient="index") + if self.table2 is not None: + data["table2"] = self.table2.to_dict(orient="index") + data["fingerprints"] = {str(k): v for k, v in self.fingerprints.items()} + with open(path, "w") as f: + json.dump(data, f, indent=2, default=str) + + def summary(self): + """Console-friendly summary.""" + lines = [] + if self.table1 is not None: + lines.append(f"Table 1: {len(self.table1)} rows (fixed law)") + if "ise" in self.table1.columns: + best = self.table1["ise"].idxmin() + lines.append(f" Best ISE: {best} = {self.table1.loc[best, 'ise']:.4f}") + if self.table2 is not None: + lines.append(f"Table 2: {len(self.table2)} rows (matched controller)") + if "ise" in self.table2.columns: + best = self.table2["ise"].idxmin() + lines.append(f" Best ISE: {best} = {self.table2.loc[best, 'ise']:.4f}") + return "\n".join(lines) if lines else "No results" + + +def benchmark( + plants=None, + trained_models_dir=None, + algorithms=None, + disturbances=None, + T=5.0, + dt=0.01, + fixed_gains=None, + include_matched=True, + output_dir=None, +): + """Run the full benchmark: RL vs classical controllers. + + Parameters + ---------- + plants : list of str + Plant names. Default: all 5. + trained_models_dir : str or None + Directory with trained RL models. None = skip RL entries. + algorithms : list of str + RL algorithms to include. Default: ["PPO", "SAC"]. + disturbances : list of str + Disturbance types. Default: all 4. + T, dt : float + Simulation time and step. + fixed_gains : dict + Control gains for Table 1. Default: K=5, lam=3, phi=0.05. + include_matched : bool + Whether to compute Table 2. + output_dir : str or None + Save results here if provided. + + Returns + ------- + BenchmarkResults + """ + if plants is None: + plants = ["double_integrator", "inverted_pendulum", "crane", "quadrotor", "pmsm"] + if algorithms is None: + algorithms = ["PPO", "SAC"] + if disturbances is None: + disturbances = ["none", "constant", "sinusoidal", "step"] + if fixed_gains is None: + fixed_gains = {"K": 5, "lam": 3, "phi": 0.05} + + surfaces = _get_classical_surfaces() + + # === Table 1: Fixed switching law === + rows1 = [] + for plant_name in plants: + plant = PLANT_MAP[plant_name]() + ref = _REFS[plant_name] + for dist in disturbances: + dist_fn = _disturbance_fn(dist) + # Classical surfaces + for surf_name, surface in surfaces.items(): + surface.reset() + m = _simulate_fixed_law(surface, plant, T, dt, fixed_gains, ref, dist_fn) + m.update({"surface": surf_name, "plant": plant_name, "disturbance": dist}) + rows1.append(m) + # RL surfaces + for algo in algorithms: + rl_surf = _load_rl_surface(trained_models_dir, algo, plant_name, dist) + if rl_surf is not None: + m = _simulate_fixed_law(rl_surf, plant, T, dt, fixed_gains, ref, dist_fn) + m.update({"surface": f"RL-{algo}", "plant": plant_name, "disturbance": dist}) + rows1.append(m) + + table1 = pd.DataFrame(rows1) + if len(table1) > 0: + table1 = table1.set_index(["surface", "plant", "disturbance"]) + + # === Table 2: Best-matched controller === + table2 = None + if include_matched: + rows2 = [] + for plant_name in plants: + plant = PLANT_MAP[plant_name]() + ref = _REFS[plant_name] + configs = _get_controller_configs(plant) + for dist in disturbances: + dist_fn = _disturbance_fn(dist) + for ctrl_name in configs: + m = _simulate_matched_controller( + ctrl_name, plant, T, dt, ref, dist_fn, configs + ) + m.update({"controller": ctrl_name, "plant": plant_name, "disturbance": dist}) + rows2.append(m) + table2 = pd.DataFrame(rows2) + if len(table2) > 0: + table2 = table2.set_index(["controller", "plant", "disturbance"]) + + results = BenchmarkResults(table1=table1, table2=table2) + + if output_dir: + os.makedirs(output_dir, exist_ok=True) + results.to_json(os.path.join(output_dir, "benchmark_results.json")) + + return results +``` + +- [ ] **Step 2: Run tests** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/test_benchmark.py -v --no-header -x` +Expected: 10 PASSED + +- [ ] **Step 3: Commit** + +```bash +cd D:/OpenSMC/python && git add opensmc/rl/benchmark.py +git commit -m "feat: add benchmark suite — RL vs 17 controllers x 5 plants" +``` + +--- + +## Task 8: Visualize — Tests + Implementation + +**Files:** +- Create: `tests/test_visualize.py` +- Create: `opensmc/rl/visualize.py` + +- [ ] **Step 1: Write test file** + +```python +"""Tests for visualization module.""" + +import numpy as np +import pytest + +pytest.importorskip("matplotlib") + + +class TestVisualize: + """Test all visualization functions return Figure objects.""" + + def test_training_curve(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import training_curve + fig = training_curve( + reward_history=[1.0, 2.0, 3.0], + ise_history=[0.5, 0.3, 0.1], + eval_freq=10_000, + ) + assert fig is not None + import matplotlib.pyplot as plt + assert isinstance(fig, plt.Figure) + plt.close(fig) + + def test_surface_heatmap(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import surface_heatmap + # Use a callable surface instead of model path + sigma_fn = lambda e, ed: e + 2 * ed + fig = surface_heatmap(sigma_fn=sigma_fn, e_range=(-3, 3), edot_range=(-3, 3)) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + def test_radar_chart(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import radar_chart + scores = {"Linear": 0.8, "Terminal": 0.6, "FastTerminal": 0.9, + "NonsingularTerminal": 0.7, "IntegralSliding": 0.3, + "IntegralTerminal": 0.5, "PID": 0.4, "Global": 0.2, + "PredefinedTime": 0.1, "NonlinearDamping": 0.3} + fig = radar_chart(scores) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + def test_benchmark_bars(self): + import matplotlib + matplotlib.use("Agg") + import pandas as pd + from opensmc.rl.visualize import benchmark_bars + data = pd.DataFrame({ + "ise": [0.1, 0.2, 0.3], + }, index=["Linear", "Terminal", "RL"]) + fig = benchmark_bars(data, metric="ise") + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + def test_save_path(self, tmp_path): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import training_curve + path = str(tmp_path / "test.png") + fig = training_curve( + reward_history=[1.0, 2.0], ise_history=[0.5, 0.3], + eval_freq=10_000, save_path=path, + ) + import os + assert os.path.exists(path) + import matplotlib.pyplot as plt + plt.close(fig) + + def test_contour_overlay(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import contour_overlay + from opensmc.surfaces import LinearSurface, FastTerminalSurface + fig = contour_overlay( + surfaces={"Linear": LinearSurface(c=10), "FastTerminal": FastTerminalSurface()}, + e_range=(-3, 3), edot_range=(-3, 3), + ) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + def test_cross_plant_radar(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import cross_plant_radar + fp = { + "double_integrator": {"Linear": 0.8, "Terminal": 0.6}, + "crane": {"Linear": 0.5, "Terminal": 0.9}, + } + fig = cross_plant_radar(fp) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) +``` + +- [ ] **Step 2: Write visualize.py** + +```python +"""Visualization — publication-quality plots for RL surface analysis. + +All functions return matplotlib.Figure. Optional save_path for PNG/PDF export. +""" + +import numpy as np + +try: + import matplotlib + import matplotlib.pyplot as plt +except ImportError: + raise ImportError("matplotlib is required. Install with: pip install opensmc[viz]") + + +def _save_and_return(fig, save_path): + """Save figure if path provided, return figure.""" + if save_path: + fig.savefig(save_path, dpi=300, bbox_inches="tight") + return fig + + +def training_curve(reward_history, ise_history, eval_freq=10_000, save_path=None): + """Plot reward and ISE over training steps. + + Parameters + ---------- + reward_history : list of float + ise_history : list of float + eval_freq : int + save_path : str or None + """ + steps = [eval_freq * (i + 1) for i in range(len(reward_history))] + + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) + + ax1.plot(steps, reward_history, "b-", linewidth=1.5) + ax1.set_xlabel("Training steps") + ax1.set_ylabel("Cumulative reward") + ax1.set_title("Reward") + ax1.grid(True, alpha=0.3) + + ax2.plot(steps, ise_history, "r-", linewidth=1.5) + ax2.set_xlabel("Training steps") + ax2.set_ylabel("ISE") + ax2.set_title("Integrated Squared Error") + ax2.grid(True, alpha=0.3) + + fig.tight_layout() + return _save_and_return(fig, save_path) + + +def surface_heatmap(sigma_fn=None, model_path=None, e_range=(-3, 3), + edot_range=(-3, 3), resolution=100, save_path=None): + """Plot sigma(e, edot) heatmap. + + Parameters + ---------- + sigma_fn : callable(e, edot) -> float + Surface function. Used directly if provided. + model_path : str + Path to SB3 model. Loaded if sigma_fn is None. + """ + if sigma_fn is None and model_path is not None: + from opensmc.rl import RLDiscoveredSurface + surface = RLDiscoveredSurface(model_path, use_4d_obs=True) + sigma_fn = lambda e, ed: surface.compute(e, ed) + + e = np.linspace(*e_range, resolution) + ed = np.linspace(*edot_range, resolution) + E, ED = np.meshgrid(e, ed) + S = np.vectorize(sigma_fn)(E, ED) + + fig, ax = plt.subplots(figsize=(7, 5)) + im = ax.pcolormesh(E, ED, S, cmap="RdBu_r", shading="auto") + ax.contour(E, ED, S, levels=[0], colors="k", linewidths=2) + fig.colorbar(im, ax=ax, label=r"$\sigma(e, \dot{e})$") + ax.set_xlabel(r"$e$") + ax.set_ylabel(r"$\dot{e}$") + ax.set_title("Discovered Sliding Surface") + return _save_and_return(fig, save_path) + + +def contour_overlay(surfaces, e_range=(-3, 3), edot_range=(-3, 3), + resolution=100, save_path=None): + """Overlay s=0 contours of multiple surfaces. + + Parameters + ---------- + surfaces : dict of {name: SlidingSurface} + """ + e = np.linspace(*e_range, resolution) + ed = np.linspace(*edot_range, resolution) + E, ED = np.meshgrid(e, ed) + + fig, ax = plt.subplots(figsize=(7, 5)) + colors = plt.cm.tab10(np.linspace(0, 1, len(surfaces))) + + for (name, surface), color in zip(surfaces.items(), colors): + S = np.vectorize(lambda ei, edi: surface.compute(ei, edi))(E, ED) + ax.contour(E, ED, S, levels=[0], colors=[color], linewidths=1.5) + ax.plot([], [], color=color, label=name, linewidth=1.5) + + ax.set_xlabel(r"$e$") + ax.set_ylabel(r"$\dot{e}$") + ax.set_title("Sliding Manifold Comparison (s=0)") + ax.legend(loc="upper right", fontsize=8) + ax.grid(True, alpha=0.3) + return _save_and_return(fig, save_path) + + +def radar_chart(scores, title="Fingerprint", save_path=None): + """Radar chart of similarity scores to known surfaces. + + Parameters + ---------- + scores : dict of {surface_name: float} + """ + labels = list(scores.keys()) + values = list(scores.values()) + N = len(labels) + + angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist() + values_closed = values + [values[0]] + angles_closed = angles + [angles[0]] + + fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={"projection": "polar"}) + ax.plot(angles_closed, values_closed, "b-", linewidth=2) + ax.fill(angles_closed, values_closed, alpha=0.25, color="b") + ax.set_xticks(angles) + ax.set_xticklabels(labels, fontsize=8) + ax.set_ylim(0, 1) + ax.set_title(title, pad=20) + return _save_and_return(fig, save_path) + + +def cross_plant_radar(fingerprints_by_plant, save_path=None): + """Side-by-side radar charts per plant. + + Parameters + ---------- + fingerprints_by_plant : dict of {plant_name: {surface_name: score}} + """ + n = len(fingerprints_by_plant) + fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), + subplot_kw={"projection": "polar"}) + if n == 1: + axes = [axes] + + for ax, (plant, scores) in zip(axes, fingerprints_by_plant.items()): + labels = list(scores.keys()) + values = list(scores.values()) + N = len(labels) + angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist() + values_closed = values + [values[0]] + angles_closed = angles + [angles[0]] + + ax.plot(angles_closed, values_closed, "b-", linewidth=2) + ax.fill(angles_closed, values_closed, alpha=0.25, color="b") + ax.set_xticks(angles) + ax.set_xticklabels(labels, fontsize=7) + ax.set_ylim(0, 1) + ax.set_title(plant, pad=15) + + fig.tight_layout() + return _save_and_return(fig, save_path) + + +def benchmark_bars(data, metric="ise", title=None, save_path=None): + """Bar chart comparing surfaces/controllers on a metric. + + Parameters + ---------- + data : pd.DataFrame + Index = surface/controller names, must contain `metric` column. + metric : str + Column name to plot. + """ + fig, ax = plt.subplots(figsize=(10, 5)) + + names = data.index.tolist() + values = data[metric].values + + colors = ["#e74c3c" if "RL" in str(n) else "#3498db" for n in names] + bars = ax.barh(names, values, color=colors) + ax.set_xlabel(metric.upper()) + ax.set_title(title or f"Benchmark: {metric.upper()}") + ax.invert_yaxis() + fig.tight_layout() + return _save_and_return(fig, save_path) + + +def time_domain(results_dict, top_n=5, save_path=None): + """Time-domain comparison of top-N surfaces. + + Parameters + ---------- + results_dict : dict of {name: simulation_result_dict} + Each value has keys: t, e, u, s + top_n : int + Number of surfaces to show. + """ + fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 8), sharex=True) + colors = plt.cm.tab10(np.linspace(0, 1, min(top_n, len(results_dict)))) + + for (name, res), color in zip(list(results_dict.items())[:top_n], colors): + t = res["t"] + ax1.plot(t, res["e"][:, 0] if res["e"].ndim > 1 else res["e"], + color=color, label=name, linewidth=1.2) + ax2.plot(t, res["s"] if "s" in res else np.zeros_like(t), + color=color, linewidth=1.2) + ax3.plot(t, res["u"][:, 0] if res["u"].ndim > 1 else res["u"], + color=color, linewidth=1.2) + + ax1.set_ylabel("Error e(t)") + ax1.legend(fontsize=7, ncol=2) + ax1.grid(True, alpha=0.3) + ax2.set_ylabel("Sliding var s(t)") + ax2.grid(True, alpha=0.3) + ax3.set_ylabel("Control u(t)") + ax3.set_xlabel("Time (s)") + ax3.grid(True, alpha=0.3) + fig.tight_layout() + return _save_and_return(fig, save_path) +``` + +- [ ] **Step 3: Run tests** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/test_visualize.py -v --no-header` +Expected: 7 PASSED + +- [ ] **Step 4: Commit** + +```bash +cd D:/OpenSMC/python && git add opensmc/rl/visualize.py tests/test_visualize.py +git commit -m "feat: add RL visualization module — heatmaps, radar, contours, bars" +``` + +--- + +## Task 9: Integration — __init__.py, pyproject.toml, examples + +**Files:** +- Modify: `opensmc/rl/__init__.py` +- Modify: `pyproject.toml:18` +- Create: `examples/train_and_fingerprint.py` +- Create: `examples/full_benchmark.py` + +- [ ] **Step 1: Update opensmc/rl/__init__.py** + +Replace entire file: + +```python +"""OpenSMC RL — Reinforcement learning integration. + +Provides: +- RLDiscoveredSurface: wraps a trained SB3 policy as a sliding surface +- fingerprinting: decompose any surface against 10 known SMC surface types +- SurfaceDiscoveryEnv: Gymnasium env for RL surface discovery +- train_surface / train_all_surfaces: PPO/SAC training pipeline +- benchmark: RL vs 17 classical controllers comparison +- visualize: publication-quality plots +""" + +from .rl_surface import RLDiscoveredSurface +from . import fingerprinting + +__all__ = [ + 'RLDiscoveredSurface', + 'fingerprinting', + 'SurfaceDiscoveryEnv', + 'train_surface', 'train_all_surfaces', 'TrainingResult', + 'benchmark', 'BenchmarkResults', + 'visualize', +] + + +def __getattr__(name): + """Lazy imports for optional heavy dependencies.""" + if name == "SurfaceDiscoveryEnv": + from .discovery_env import SurfaceDiscoveryEnv + return SurfaceDiscoveryEnv + if name in ("train_surface", "train_all_surfaces", "TrainingResult"): + from . import trainer + return getattr(trainer, name) + if name in ("benchmark", "BenchmarkResults"): + from . import benchmark as bm + return getattr(bm, name) + if name == "visualize": + from . import visualize + return visualize + raise AttributeError(f"module 'opensmc.rl' has no attribute {name!r}") +``` + +- [ ] **Step 2: Update pyproject.toml — add pandas to rl extras** + +Change line 18 from: +``` +rl = ["gymnasium>=0.29", "stable-baselines3>=2.0", "torch>=2.0"] +``` +to: +``` +rl = ["gymnasium>=0.29", "stable-baselines3>=2.0", "torch>=2.0", "pandas>=1.5"] +``` + +- [ ] **Step 3: Write examples/train_and_fingerprint.py** + +```python +"""Example: Train PPO on DoubleIntegrator, fingerprint, plot radar chart. + +Usage: + cd D:/OpenSMC/python + python examples/train_and_fingerprint.py +""" + +import numpy as np +from opensmc.rl import train_surface, RLDiscoveredSurface +from opensmc.rl import fingerprinting, visualize + +# 1. Train PPO (50K steps for demo — use 500K for paper) +print("Training PPO on DoubleIntegrator...") +result = train_surface( + plant="double_integrator", + algorithm="PPO", + disturbance="sinusoidal", + total_timesteps=50_000, + n_envs=4, + output_dir="demo_models", + seed=42, +) +print(f"Training complete in {result.training_time:.1f}s") +print(f"Final reward: {result.final_reward:.2f}, ISE: {result.final_ise:.4f}") + +# 2. Load as RLDiscoveredSurface +surface = RLDiscoveredSurface(result.model_path, use_4d_obs=True) + +# 3. Fingerprint against 10 known surfaces +fp = fingerprinting.fingerprint(surface) +print("\nFingerprint scores:") +for name, score in sorted(fp.items(), key=lambda x: -x[1]): + print(f" {name:25s} {score:.3f}") + +# 4. Visualize +visualize.training_curve( + result.reward_history, result.ise_history, + save_path="demo_models/training_curve.png" +) +visualize.surface_heatmap( + sigma_fn=lambda e, ed: surface.compute(e, ed), + save_path="demo_models/surface_heatmap.png" +) +visualize.radar_chart(fp, save_path="demo_models/radar.png") +print("\nFigures saved to demo_models/") +``` + +- [ ] **Step 4: Write examples/full_benchmark.py** + +```python +"""Example: Run full benchmark — RL vs all controllers. + +Usage: + cd D:/OpenSMC/python + python examples/full_benchmark.py +""" + +from opensmc.rl import benchmark, visualize + +# Run benchmark (classical only — add trained_models_dir for RL) +print("Running benchmark (classical controllers only)...") +results = benchmark( + plants=["double_integrator", "inverted_pendulum"], + trained_models_dir=None, # Set to "trained_models" after training + algorithms=[], + disturbances=["none", "sinusoidal"], + T=5.0, + dt=0.01, + include_matched=True, + output_dir="benchmark_results", +) + +print("\n" + results.summary()) +print("\nLaTeX (first 20 lines):") +print("\n".join(results.to_latex().split("\n")[:20])) +print(f"\nResults saved to benchmark_results/") +``` + +- [ ] **Step 5: Run full test suite** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/ -v --no-header -q` +Expected: All existing tests + all new tests PASS + +- [ ] **Step 6: Commit** + +```bash +cd D:/OpenSMC/python && git add opensmc/rl/__init__.py pyproject.toml examples/train_and_fingerprint.py examples/full_benchmark.py +git commit -m "feat: complete Phase D integration — public API, deps, examples" +``` + +--- + +## Task 10: Smoke Test — End-to-End Validation + +**Files:** None (validation only) + +- [ ] **Step 1: Run the full test suite** + +Run: `cd D:/OpenSMC/python && python -m pytest tests/ -v --tb=short` +Expected: ALL PASS (existing 112 + ~42 new = ~154 tests) + +- [ ] **Step 2: Run the train_and_fingerprint example** + +Run: `cd D:/OpenSMC/python && python examples/train_and_fingerprint.py` +Expected: Completes without error, produces 3 PNG files in demo_models/ + +- [ ] **Step 3: Run the benchmark example** + +Run: `cd D:/OpenSMC/python && python examples/full_benchmark.py` +Expected: Prints summary + LaTeX snippet, saves JSON to benchmark_results/ + +- [ ] **Step 4: Verify import works cleanly** + +Run: `cd D:/OpenSMC/python && python -c "from opensmc.rl import RLDiscoveredSurface, fingerprinting, SurfaceDiscoveryEnv, train_surface, benchmark, visualize; print('All imports OK')"` +Expected: "All imports OK" + +- [ ] **Step 5: Final commit** + +```bash +cd D:/OpenSMC/python && git add -A && git status +# Only commit if there are cleanup changes +git commit -m "chore: Phase D smoke test passed — all tests green" +``` From 43ccf25b500e132081e4fc8fa46c5cdc155a6150 Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 11:27:55 +0300 Subject: [PATCH 03/10] feat: add SurfaceDiscoveryEnv for RL surface discovery TDD: 20 tests written first (all failed), then implementation created. All 20 tests pass. No regressions in existing 193 tests. Gymnasium environment where RL agent outputs sigma (sliding variable) directly, and a fixed switching control law converts to control input. Supports 5 plants (double_integrator, inverted_pendulum, crane, quadrotor, pmsm), 4 disturbance types, RK4 integration, and configurable control gains. Co-Authored-By: Claude Opus 4.6 (1M context) --- python/opensmc/rl/discovery_env.py | 264 +++++++++++++++++++++++++++++ python/tests/test_discovery_env.py | 157 +++++++++++++++++ 2 files changed, 421 insertions(+) create mode 100644 python/opensmc/rl/discovery_env.py create mode 100644 python/tests/test_discovery_env.py diff --git a/python/opensmc/rl/discovery_env.py b/python/opensmc/rl/discovery_env.py new file mode 100644 index 0000000..fcf6004 --- /dev/null +++ b/python/opensmc/rl/discovery_env.py @@ -0,0 +1,264 @@ +"""SurfaceDiscoveryEnv — Gymnasium environment for RL surface discovery. + +The RL agent outputs the sliding variable sigma directly. A fixed switching +control law converts sigma to control input u. This isolates surface quality +from controller design, enabling fair comparison with classical surfaces. + +Ported from autosmc/envs/discovery_env.py, adapted to OpenSMC Plant classes. +""" + +import numpy as np + +try: + import gymnasium as gym + from gymnasium import spaces +except ImportError: + raise ImportError( + "gymnasium is required for SurfaceDiscoveryEnv. " + "Install with: pip install opensmc[rl]" + ) + +from opensmc.plants import ( + DoubleIntegrator, InvertedPendulum, SinglePendulumCrane, + Quadrotor6DOF, PMSM, +) + +PLANT_REGISTRY = { + "double_integrator": DoubleIntegrator, + "inverted_pendulum": InvertedPendulum, + "crane": SinglePendulumCrane, + "quadrotor": Quadrotor6DOF, + "pmsm": PMSM, +} + +_ERROR_EXTRACTORS = { + "double_integrator": lambda x, ref: (x[0] - ref, x[1]), + "inverted_pendulum": lambda x, ref: (x[2], x[3]), + "crane": lambda x, ref: (x[0] - ref, x[1]), + "quadrotor": lambda x, ref: (x[2] - ref, x[8]), + "pmsm": lambda x, ref: (x[2] - ref, None), +} + +_RESET_RANGES = { + "double_integrator": {"pos": (-2, 2), "vel": (-1, 1)}, + "inverted_pendulum": {"pos": (-0.5, 0.5), "vel": (-0.3, 0.3)}, + "crane": {"pos": (-0.5, 0.5), "vel": (-0.3, 0.3)}, + "quadrotor": {"pos": (-1, 1), "vel": (-0.5, 0.5)}, + "pmsm": {"pos": (-5, 5), "vel": (-1, 1)}, +} + +_DEFAULT_REFS = { + "double_integrator": 1.0, + "inverted_pendulum": 0.0, + "crane": 0.5, + "quadrotor": 1.0, + "pmsm": 100.0, +} + + +def _pmsm_edot(plant, state): + """Compute PMSM angular acceleration from dynamics.""" + i_q = state[1] + omega = state[2] + Te = 1.5 * plant.pp * plant.psi_f * i_q + return (Te - plant.B * omega - plant.TL) / plant.J + + +class SurfaceDiscoveryEnv(gym.Env): + """Gymnasium environment where RL agent discovers sliding surfaces.""" + + metadata = {"render_modes": []} + + def __init__( + self, + plant="double_integrator", + disturbance="none", + disturbance_amplitude=1.0, + dt=0.01, + max_steps=500, + sigma_max=20.0, + control_gains=None, + ref=None, + rk4_substeps=4, + ): + super().__init__() + + if isinstance(plant, str): + if plant not in PLANT_REGISTRY: + raise ValueError( + f"Unknown plant '{plant}'. " + f"Available: {list(PLANT_REGISTRY.keys())}" + ) + self.plant_name = plant + self.plant = PLANT_REGISTRY[plant]() + else: + self.plant = plant + self.plant_name = next( + (k for k, v in PLANT_REGISTRY.items() + if isinstance(plant, v)), + "custom" + ) + + self.disturbance_type = disturbance + self.disturbance_amplitude = disturbance_amplitude + self.dt = dt + self.max_steps = max_steps + self.sigma_max = sigma_max + self.rk4_substeps = rk4_substeps + self.sub_dt = dt / rk4_substeps + + gains = control_gains or {} + self.K = gains.get("K", 5.0) + self.lam = gains.get("lam", 3.0) + self.phi = gains.get("phi", 0.05) + + self.ref = ref if ref is not None else _DEFAULT_REFS.get(self.plant_name, 1.0) + + self._extract_error = _ERROR_EXTRACTORS.get(self.plant_name) + if self._extract_error is None: + raise ValueError(f"No error extractor for plant '{self.plant_name}'") + + obs_low = np.array([-10.0, -10.0, -50.0, 0.0], dtype=np.float32) + obs_high = np.array([10.0, 10.0, 50.0, 100.0], dtype=np.float32) + self.observation_space = spaces.Box(obs_low, obs_high, dtype=np.float32) + self.action_space = spaces.Box( + low=-1.0, high=1.0, shape=(1,), dtype=np.float32 + ) + + self.state = None + self.sigma_prev = 0.0 + self.u_prev = 0.0 + self.step_count = 0 + self.t = 0.0 + + def _get_error(self, state): + e, edot = self._extract_error(state, self.ref) + if edot is None: + edot = _pmsm_edot(self.plant, state) + return float(e), float(edot) + + def _get_disturbance(self, t): + amp = self.disturbance_amplitude + if self.disturbance_type == "none": + return 0.0 + elif self.disturbance_type == "constant": + return amp + elif self.disturbance_type == "sinusoidal": + return amp * np.sin(2 * np.pi * t) + elif self.disturbance_type == "step": + return amp if t > 1.0 else 0.0 + return 0.0 + + def _format_disturbance(self, d_scalar): + """Convert scalar disturbance to plant-specific format.""" + if self.plant_name == "quadrotor": + d = np.zeros(12) + d[8] = d_scalar + return d + return d_scalar + + def _rk4_step(self, t, x, u, d_scalar): + d = self._format_disturbance(d_scalar) + if self.plant_name == "quadrotor": + u_full = np.array([u, 0.0, 0.0, 0.0]) + elif self.plant_name == "pmsm": + u_full = np.array([0.0, u]) + else: + u_full = u + f = lambda t_, x_: np.array( + self.plant.dynamics(t_, x_, u_full, d), dtype=np.float64 + ) + dt = self.sub_dt + k1 = f(t, x) + k2 = f(t + dt / 2, x + dt / 2 * k1) + k3 = f(t + dt / 2, x + dt / 2 * k2) + k4 = f(t + dt, x + dt * k3) + return x + (dt / 6) * (k1 + 2 * k2 + 2 * k3 + k4) + + def reset(self, seed=None, options=None): + super().reset(seed=seed) + + ranges = _RESET_RANGES.get(self.plant_name, {"pos": (-1, 1), "vel": (-0.5, 0.5)}) + n = self.plant.n_states + x0 = np.zeros(n, dtype=np.float64) + + if self.plant_name == "double_integrator": + x0[0] = self.np_random.uniform(*ranges["pos"]) + x0[1] = self.np_random.uniform(*ranges["vel"]) + elif self.plant_name == "inverted_pendulum": + x0[2] = self.np_random.uniform(*ranges["pos"]) + x0[3] = self.np_random.uniform(*ranges["vel"]) + elif self.plant_name == "crane": + x0[0] = self.np_random.uniform(*ranges["pos"]) + x0[1] = self.np_random.uniform(*ranges["vel"]) + elif self.plant_name == "quadrotor": + x0[2] = self.np_random.uniform(*ranges["pos"]) + x0[8] = self.np_random.uniform(*ranges["vel"]) + elif self.plant_name == "pmsm": + x0[2] = self.ref + self.np_random.uniform(*ranges["pos"]) + x0[1] = self.np_random.uniform(*ranges["vel"]) + + self.state = x0 + self.sigma_prev = 0.0 + self.u_prev = 0.0 + self.step_count = 0 + self.t = 0.0 + + e, edot = self._get_error(self.state) + obs = self._build_obs(e, edot) + return obs, {"e": e, "edot": edot} + + def step(self, action): + sigma = float(action[0]) * self.sigma_max + + sat_s = np.clip(sigma / self.phi, -1.0, 1.0) + u = -self.K * sat_s - self.lam * sigma + du = u - self.u_prev + + d = self._get_disturbance(self.t) + + x = self.state.copy() + for _ in range(self.rk4_substeps): + x = self._rk4_step(self.t, x, u, d) + self.t += self.sub_dt + self.state = x + + e, edot = self._get_error(self.state) + + reward = -1.0 * e**2 - 0.01 * u**2 - 0.005 * du**2 + + if abs(e) < 0.02 and abs(edot) < 0.05: + reward += 5.0 * self.dt + + self.sigma_prev = sigma + self.u_prev = u + self.step_count += 1 + + truncated = abs(e) > 10.0 or abs(edot) > 20.0 + if truncated: + reward -= 50.0 + + terminated = False + if self.step_count >= self.max_steps: + truncated = True + + obs = self._build_obs(e, edot) + info = {"e": e, "edot": edot, "sigma": sigma, "u": u, "t": self.t} + return obs, float(reward), terminated, truncated, info + + def _build_obs(self, e, edot): + obs = np.array([ + e, edot, self.sigma_prev, abs(self.u_prev) + ], dtype=np.float32) + return np.clip(obs, self.observation_space.low, self.observation_space.high) + + +# Register with Gymnasium +try: + if "OpenSMC/SurfaceDiscovery-v0" not in gym.registry: + gym.register( + id="OpenSMC/SurfaceDiscovery-v0", + entry_point="opensmc.rl.discovery_env:SurfaceDiscoveryEnv", + ) +except Exception: + pass diff --git a/python/tests/test_discovery_env.py b/python/tests/test_discovery_env.py new file mode 100644 index 0000000..20d9539 --- /dev/null +++ b/python/tests/test_discovery_env.py @@ -0,0 +1,157 @@ +"""Tests for SurfaceDiscoveryEnv — Gymnasium environment for RL surface discovery.""" + +import numpy as np +import pytest + +gymnasium = pytest.importorskip("gymnasium") + + +class TestSurfaceDiscoveryEnvCreation: + """Test environment creation with different plants.""" + + @pytest.mark.parametrize("plant", [ + "double_integrator", "inverted_pendulum", "crane", "quadrotor", "pmsm" + ]) + def test_create_env(self, plant): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant=plant) + assert env.observation_space.shape == (4,) + assert env.action_space.shape == (1,) + env.close() + + def test_create_with_plant_instance(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + from opensmc.plants import DoubleIntegrator + env = SurfaceDiscoveryEnv(plant=DoubleIntegrator()) + assert env.observation_space.shape == (4,) + env.close() + + def test_invalid_plant_string(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + with pytest.raises(ValueError, match="Unknown plant"): + SurfaceDiscoveryEnv(plant="nonexistent") + + +class TestSurfaceDiscoveryEnvGymAPI: + """Test Gymnasium API compliance.""" + + def test_reset_returns_obs_info(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + obs, info = env.reset(seed=42) + assert obs.shape == (4,) + assert isinstance(info, dict) + env.close() + + def test_step_returns_five_tuple(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + env.reset(seed=42) + action = env.action_space.sample() + obs, reward, terminated, truncated, info = env.step(action) + assert obs.shape == (4,) + assert isinstance(reward, float) + assert isinstance(terminated, bool) + assert isinstance(truncated, bool) + assert isinstance(info, dict) + env.close() + + def test_episode_terminates(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator", max_steps=10) + env.reset(seed=42) + done = False + steps = 0 + while not done: + obs, reward, terminated, truncated, info = env.step(np.array([0.0])) + done = terminated or truncated + steps += 1 + assert steps <= 10 + env.close() + + def test_obs_within_bounds(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + obs, _ = env.reset(seed=42) + for _ in range(50): + obs, _, term, trunc, _ = env.step(env.action_space.sample()) + if term or trunc: + break + assert np.all(obs >= env.observation_space.low) + assert np.all(obs <= env.observation_space.high) + env.close() + + +class TestSurfaceDiscoveryEnvReward: + """Test reward computation.""" + + def test_zero_action_gives_negative_reward(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + env.reset(seed=42) + _, reward, _, _, _ = env.step(np.array([0.0])) + assert reward < 0 + env.close() + + def test_convergence_bonus(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator") + env.reset(seed=42) + env.state = np.array([env.ref + 0.001, 0.001]) + env.sigma_prev = 0.0 + env.u_prev = 0.0 + _, reward, _, _, _ = env.step(np.array([0.0])) + assert reward > -0.1 + env.close() + + +class TestSurfaceDiscoveryEnvDisturbances: + """Test disturbance types.""" + + @pytest.mark.parametrize("disturbance", ["none", "constant", "sinusoidal", "step"]) + def test_disturbance_type(self, disturbance): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv( + plant="double_integrator", disturbance=disturbance, + disturbance_amplitude=1.0 + ) + obs, _ = env.reset(seed=42) + obs2, _, _, _, _ = env.step(np.array([0.5])) + assert obs2.shape == (4,) + env.close() + + +class TestSurfaceDiscoveryEnvPMSM: + """Test PMSM-specific edot computation.""" + + def test_pmsm_edot_computed(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="pmsm") + obs, _ = env.reset(seed=42) + obs2, _, _, _, info = env.step(np.array([0.5])) + assert np.isfinite(obs2[1]) + env.close() + + +class TestSurfaceDiscoveryEnvControlLaw: + """Test internal control law.""" + + def test_control_gains(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv( + plant="double_integrator", + control_gains={"K": 10, "lam": 5, "phi": 0.1} + ) + assert env.K == 10 + assert env.lam == 5 + assert env.phi == 0.1 + env.close() + + def test_sigma_scaling(self): + from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + env = SurfaceDiscoveryEnv(plant="double_integrator", sigma_max=30.0) + env.reset(seed=42) + obs, _, _, _, info = env.step(np.array([1.0])) + assert "sigma" in info + assert abs(info["sigma"] - 30.0) < 1e-6 + env.close() From 524fba4234cdaca1d2664d8bf53d1de9f6ba4f9a Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 11:31:30 +0300 Subject: [PATCH 04/10] fix: use plant.electromagnetic_torque() in PMSM edot computation Ensures consistency with plant dynamics for non-salient motors (Ld != Lq). Co-Authored-By: Claude Opus 4.6 (1M context) --- python/opensmc/rl/discovery_env.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/opensmc/rl/discovery_env.py b/python/opensmc/rl/discovery_env.py index fcf6004..028f161 100644 --- a/python/opensmc/rl/discovery_env.py +++ b/python/opensmc/rl/discovery_env.py @@ -57,10 +57,14 @@ def _pmsm_edot(plant, state): - """Compute PMSM angular acceleration from dynamics.""" - i_q = state[1] + """Compute PMSM angular acceleration from dynamics. + + Uses plant.electromagnetic_torque() for consistency with the plant's + own dynamics() method, including the reluctance torque term when Ld != Lq. + """ + i_d, i_q = state[0], state[1] omega = state[2] - Te = 1.5 * plant.pp * plant.psi_f * i_q + Te = plant.electromagnetic_torque(i_d, i_q) return (Te - plant.B * omega - plant.TL) / plant.J From 11b140a12d15935099558fa6861dbc695f9764df Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 11:38:42 +0300 Subject: [PATCH 05/10] feat: add PPO/SAC surface training pipeline Implements train_surface() and train_all_surfaces() with VecNormalize, ISE evaluation callback, and best-model saving. 9/9 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- python/opensmc/rl/trainer.py | 243 +++++++++++++++++++++++++++++++++++ python/tests/test_trainer.py | 115 +++++++++++++++++ 2 files changed, 358 insertions(+) create mode 100644 python/opensmc/rl/trainer.py create mode 100644 python/tests/test_trainer.py diff --git a/python/opensmc/rl/trainer.py b/python/opensmc/rl/trainer.py new file mode 100644 index 0000000..fa3bfcf --- /dev/null +++ b/python/opensmc/rl/trainer.py @@ -0,0 +1,243 @@ +"""RL Surface Trainer -- PPO/SAC training pipeline with VecNormalize.""" + +import os +import time +from dataclasses import dataclass, field + +import numpy as np + +try: + from stable_baselines3 import PPO, SAC + from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize + from stable_baselines3.common.callbacks import BaseCallback, EvalCallback + from stable_baselines3.common.monitor import Monitor +except ImportError: + raise ImportError( + "stable-baselines3 is required for training. " + "Install with: pip install opensmc[rl]" + ) + +from opensmc.rl.discovery_env import SurfaceDiscoveryEnv + + +@dataclass +class TrainingResult: + model_path: str + vecnorm_path: str + best_model_path: str | None + reward_history: list = field(default_factory=list) + ise_history: list = field(default_factory=list) + final_reward: float = 0.0 + final_ise: float = float("inf") + training_time: float = 0.0 + algorithm: str = "" + plant: str = "" + disturbance: str = "" + seed: int = 0 + + +class _ISECallback(BaseCallback): + """Periodically evaluates the policy on an unnormalized env, tracking ISE.""" + + def __init__(self, eval_env, check_freq=10_000, verbose=0): + super().__init__(verbose) + self.eval_env = eval_env + self.check_freq = check_freq + self.rewards: list[float] = [] + self.ises: list[float] = [] + + def _on_step(self): + if self.n_calls % self.check_freq == 0: + obs, _ = self.eval_env.reset(seed=42) + total_reward = 0.0 + errors_sq: list[float] = [] + for _ in range(self.eval_env.max_steps): + action, _ = self.model.predict(obs, deterministic=True) + obs, reward, terminated, truncated, info = self.eval_env.step(action) + total_reward += reward + errors_sq.append(info["e"] ** 2) + if terminated or truncated: + break + ise = sum(errors_sq) * self.eval_env.dt + self.rewards.append(total_reward) + self.ises.append(ise) + if self.verbose: + print( + f" Step {self.n_calls:>7d} | " + f"reward={total_reward:8.2f} | ISE={ise:.6f}" + ) + return True + + +def _make_env(plant, disturbance, env_kwargs): + def _init(): + kw = dict(env_kwargs) if env_kwargs else {} + return SurfaceDiscoveryEnv(plant=plant, disturbance=disturbance, **kw) + return _init + + +def train_surface( + plant="double_integrator", + algorithm="PPO", + disturbance="none", + total_timesteps=500_000, + n_envs=4, + net_arch=None, + seed=42, + output_dir="trained_models", + eval_freq=10_000, + save_best=True, + normalize_obs=True, + normalize_reward=True, + clip_obs=10.0, + env_kwargs=None, +): + if algorithm not in ("PPO", "SAC"): + raise ValueError(f"Unknown algorithm '{algorithm}'. Use 'PPO' or 'SAC'.") + + if net_arch is None: + net_arch = [64, 64] + + os.makedirs(output_dir, exist_ok=True) + tag = f"{algorithm}_{plant}_{disturbance}" + + env = DummyVecEnv( + [_make_env(plant, disturbance, env_kwargs) for _ in range(n_envs)] + ) + env = VecNormalize( + env, + norm_obs=normalize_obs, + norm_reward=normalize_reward, + clip_obs=clip_obs, + ) + + eval_env = SurfaceDiscoveryEnv( + plant=plant, disturbance=disturbance, **(env_kwargs or {}) + ) + + ise_cb = _ISECallback(eval_env, check_freq=eval_freq, verbose=1) + callbacks = [ise_cb] + + best_model_path = None + if save_best: + best_dir = os.path.join(output_dir, f"{tag}_best") + eval_vec_env = DummyVecEnv( + [_make_env(plant, disturbance, env_kwargs)] + ) + eval_vec_env = VecNormalize( + eval_vec_env, + norm_obs=normalize_obs, + norm_reward=normalize_reward, + clip_obs=clip_obs, + training=False, + ) + eval_cb = EvalCallback( + eval_vec_env, + best_model_save_path=best_dir, + log_path=best_dir, + eval_freq=eval_freq, + deterministic=True, + render=False, + ) + callbacks.append(eval_cb) + best_model_path = os.path.join(best_dir, "best_model.zip") + + policy_kwargs = {"net_arch": net_arch} + if algorithm == "PPO": + model = PPO( + "MlpPolicy", + env, + learning_rate=3e-4, + n_steps=512, + batch_size=64, + n_epochs=10, + gamma=0.99, + gae_lambda=0.95, + clip_range=0.2, + policy_kwargs=policy_kwargs, + verbose=0, + seed=seed, + ) + else: + model = SAC( + "MlpPolicy", + env, + learning_rate=3e-4, + batch_size=256, + buffer_size=100_000, + gamma=0.99, + policy_kwargs=policy_kwargs, + verbose=0, + seed=seed, + ) + + t0 = time.time() + model.learn(total_timesteps=total_timesteps, callback=callbacks) + elapsed = time.time() - t0 + + model_path = os.path.join(output_dir, f"{tag}.zip") + norm_path = os.path.join(output_dir, f"{tag}_vecnorm.pkl") + model.save(model_path) + env.save(norm_path) + + eval_env.close() + env.close() + + return TrainingResult( + model_path=model_path, + vecnorm_path=norm_path, + best_model_path=( + best_model_path + if save_best and best_model_path and os.path.exists(best_model_path) + else None + ), + reward_history=ise_cb.rewards, + ise_history=ise_cb.ises, + final_reward=ise_cb.rewards[-1] if ise_cb.rewards else 0.0, + final_ise=ise_cb.ises[-1] if ise_cb.ises else float("inf"), + training_time=elapsed, + algorithm=algorithm, + plant=plant, + disturbance=disturbance, + seed=seed, + ) + + +def train_all_surfaces( + plants=None, + algorithms=None, + disturbances=None, + total_timesteps=500_000, + output_dir="trained_models", + n_envs=4, + base_seed=42, + **kwargs, +): + if plants is None: + plants = [ + "double_integrator", "inverted_pendulum", + "crane", "quadrotor", "pmsm", + ] + if algorithms is None: + algorithms = ["PPO", "SAC"] + if disturbances is None: + disturbances = ["none", "constant", "sinusoidal", "step"] + + results = [] + i = 0 + for plant_name in plants: + for algo in algorithms: + for dist in disturbances: + result = train_surface( + plant=plant_name, + algorithm=algo, + disturbance=dist, + total_timesteps=total_timesteps, + output_dir=output_dir, + n_envs=n_envs, + seed=base_seed + i, + **kwargs, + ) + results.append(result) + i += 1 + return results diff --git a/python/tests/test_trainer.py b/python/tests/test_trainer.py new file mode 100644 index 0000000..52dce10 --- /dev/null +++ b/python/tests/test_trainer.py @@ -0,0 +1,115 @@ +"""Tests for RL surface trainer -- PPO/SAC training pipeline.""" + +import os +import numpy as np +import pytest + +pytest.importorskip("stable_baselines3") + + +class TestTrainSurface: + def test_ppo_training_completes(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="double_integrator", algorithm="PPO", + disturbance="none", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + assert os.path.exists(result.model_path) + assert os.path.exists(result.vecnorm_path) + assert result.algorithm == "PPO" + assert result.plant == "double_integrator" + assert result.training_time > 0 + + def test_sac_training_completes(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="double_integrator", algorithm="SAC", + disturbance="none", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + assert os.path.exists(result.model_path) + assert result.algorithm == "SAC" + + def test_model_file_naming(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="inverted_pendulum", algorithm="PPO", + disturbance="sinusoidal", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + assert "PPO_inverted_pendulum_sinusoidal" in result.model_path + + def test_reward_history_populated(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="double_integrator", algorithm="PPO", + disturbance="none", total_timesteps=5000, + n_envs=1, output_dir=str(tmp_path), + eval_freq=2000, seed=42, + ) + assert len(result.reward_history) >= 1 + assert len(result.ise_history) >= 1 + + def test_invalid_algorithm_raises(self, tmp_path): + from opensmc.rl.trainer import train_surface + with pytest.raises(ValueError, match="Unknown algorithm"): + train_surface( + plant="double_integrator", algorithm="TD3", + total_timesteps=1000, output_dir=str(tmp_path), + ) + + def test_vecnorm_stats_saved(self, tmp_path): + from opensmc.rl.trainer import train_surface + result = train_surface( + plant="double_integrator", algorithm="PPO", + disturbance="none", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + assert result.vecnorm_path.endswith(".pkl") + assert os.path.getsize(result.vecnorm_path) > 0 + + +class TestTrainAllSurfaces: + def test_trains_multiple_models(self, tmp_path): + from opensmc.rl.trainer import train_all_surfaces + results = train_all_surfaces( + plants=["double_integrator"], + algorithms=["PPO"], + disturbances=["none", "constant"], + total_timesteps=2048, + output_dir=str(tmp_path), + n_envs=1, base_seed=42, + ) + assert len(results) == 2 + assert all(os.path.exists(r.model_path) for r in results) + + def test_seed_propagation(self, tmp_path): + from opensmc.rl.trainer import train_all_surfaces + results = train_all_surfaces( + plants=["double_integrator"], + algorithms=["PPO"], + disturbances=["none", "constant"], + total_timesteps=2048, + output_dir=str(tmp_path), + n_envs=1, base_seed=42, + ) + assert results[0].model_path != results[1].model_path + + +class TestTrainedModelLoading: + def test_load_trained_model(self, tmp_path): + from opensmc.rl.trainer import train_surface + from opensmc.rl import RLDiscoveredSurface + + result = train_surface( + plant="double_integrator", algorithm="PPO", + disturbance="none", total_timesteps=2048, + n_envs=1, output_dir=str(tmp_path), seed=42, + ) + surface = RLDiscoveredSurface( + result.model_path, + obs_fn=lambda e, ed, t: np.array([e, ed, 0.0, 0.0], dtype=np.float32) + ) + s = surface.compute(1.0, 0.5, 0.0) + assert np.isfinite(s) From 347f96c529a180ee07c406663239fdb8407856d7 Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 11:41:30 +0300 Subject: [PATCH 06/10] feat: add SAC loading fallback and 4D obs padding to RLDiscoveredSurface - _load_sb3 now tries PPO first then falls back to SAC on any exception - use_4d_obs parameter pads observations to [e, edot, 0.0, 0.0] for models trained with 4-element observation spaces - 2 new tests covering both behaviours (11 total, all passing) Co-Authored-By: Claude Sonnet 4.6 --- python/opensmc/rl/rl_surface.py | 19 ++++++++++++++----- python/tests/test_rl_surface.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/python/opensmc/rl/rl_surface.py b/python/opensmc/rl/rl_surface.py index 5a68593..da6f9fc 100644 --- a/python/opensmc/rl/rl_surface.py +++ b/python/opensmc/rl/rl_surface.py @@ -40,9 +40,10 @@ class RLDiscoveredSurface(SlidingSurface): Scale factor applied to the RL output. Default 1.0. """ - def __init__(self, model, obs_fn=None, scale=1.0): + def __init__(self, model, obs_fn=None, scale=1.0, use_4d_obs=False): self.scale = scale self._obs_fn = obs_fn + self._use_4d_obs = use_4d_obs if isinstance(model, str): self._model = self._load_sb3(model) @@ -56,14 +57,17 @@ def __init__(self, model, obs_fn=None, scale=1.0): @staticmethod def _load_sb3(path): - """Load a Stable-Baselines3 model from file.""" + """Load a Stable-Baselines3 model (PPO or SAC) from file.""" try: - from stable_baselines3 import PPO - return PPO.load(path) + from stable_baselines3 import PPO, SAC except ImportError: raise ImportError( "stable-baselines3 is required to load SB3 models. " "Install with: pip install opensmc[rl]") + try: + return PPO.load(path) + except Exception: + return SAC.load(path) def _sb3_predict(self, obs): """Predict using SB3 model (deterministic).""" @@ -79,12 +83,17 @@ def _make_obs(self, e, edot, t): edot_scalar = float(edot) if np.ndim(edot) == 0 else edot if np.ndim(e_scalar) == 0: + if self._use_4d_obs: + return np.array([e_scalar, edot_scalar, 0.0, 0.0], dtype=np.float32) return np.array([e_scalar, edot_scalar], dtype=np.float32) else: - return np.concatenate([ + base = np.concatenate([ np.atleast_1d(e_scalar), np.atleast_1d(edot_scalar) ]).astype(np.float32) + if self._use_4d_obs: + return np.concatenate([base, np.zeros(2, dtype=np.float32)]) + return base def compute(self, e, edot, t=0.0, **kwargs): """Compute sliding variable using the RL policy. diff --git a/python/tests/test_rl_surface.py b/python/tests/test_rl_surface.py index 1a83fec..c763855 100644 --- a/python/tests/test_rl_surface.py +++ b/python/tests/test_rl_surface.py @@ -70,3 +70,32 @@ def test_zero_output(self): """Zero model output should give zero sigma.""" surface = RLDiscoveredSurface(model=lambda obs: np.array([0.0])) assert surface.compute(1.0, 1.0) == pytest.approx(0.0) + + +def test_4d_obs_padding(): + """RLDiscoveredSurface pads to 4D when use_4d_obs=True.""" + from opensmc.rl import RLDiscoveredSurface + import numpy as np + obs_log = [] + def model_fn(obs): + obs_log.append(obs.copy()) + return np.array([obs[0] * 2]) + surface = RLDiscoveredSurface(model_fn, use_4d_obs=True) + s = surface.compute(1.5, 0.3) + assert len(obs_log[-1]) == 4 + assert obs_log[-1][2] == 0.0 + assert obs_log[-1][3] == 0.0 + assert abs(s - 3.0) < 1e-6 + + +def test_4d_obs_disabled_by_default(): + """Default behavior still produces 2D observations.""" + from opensmc.rl import RLDiscoveredSurface + import numpy as np + obs_log = [] + def model_fn(obs): + obs_log.append(obs.copy()) + return np.array([obs[0]]) + surface = RLDiscoveredSurface(model_fn) + surface.compute(1.0, 0.5) + assert len(obs_log[-1]) == 2 From 36f3733f6dae6d8883d62d03505375e38084dd65 Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 11:50:35 +0300 Subject: [PATCH 07/10] feat: add benchmark suite -- RL vs 17 controllers x 5 plants Two-table benchmark comparing RL-discovered surfaces against all 17 classical controllers across 5 plants and 4 disturbance types: - Table 1 (Fixed Law): surfaces evaluated with u = -K*sat(s/phi) - lam*s using standalone RK4 loop matching training environment - Table 2 (Matched Controller): each controller uses its own compute() via Simulator.run() for practical performance comparison - BenchmarkResults dataclass with to_latex(), to_json(), summary() - 9/9 tests passing Co-Authored-By: Claude Opus 4.6 (1M context) --- python/opensmc/rl/benchmark.py | 642 +++++++++++++++++++++++++++++++++ python/tests/test_benchmark.py | 114 ++++++ 2 files changed, 756 insertions(+) create mode 100644 python/opensmc/rl/benchmark.py create mode 100644 python/tests/test_benchmark.py diff --git a/python/opensmc/rl/benchmark.py b/python/opensmc/rl/benchmark.py new file mode 100644 index 0000000..fc62d94 --- /dev/null +++ b/python/opensmc/rl/benchmark.py @@ -0,0 +1,642 @@ +"""Benchmark suite -- compare RL-discovered surfaces against 17 classical controllers. + +Table 1 (Fixed Switching Law): All surfaces evaluated with the same control law + u = -K*sat(s/phi) - lam*s, using RK4 integration. This isolates surface quality. + +Table 2 (Matched Controller): Each controller uses its own compute() method via + Simulator.run(). This measures practical performance. + +Usage +----- +>>> from opensmc.rl.benchmark import benchmark +>>> results = benchmark(plants=["double_integrator"], disturbances=["none"]) +>>> print(results.summary()) +>>> results.to_json("benchmark_results.json") +""" + +import json +import warnings +from dataclasses import dataclass, field + +import numpy as np +import pandas as pd + +from opensmc import metrics +from opensmc.plants import ( + DoubleIntegrator, + InvertedPendulum, + SinglePendulumCrane, + DualStageNanopositioner, + TwoLinkArm, +) +from opensmc.surfaces import ( + LinearSurface, + TerminalSurface, + NonsingularTerminalSurface, + FastTerminalSurface, + IntegralTerminalSurface, + IntegralSlidingSurface, + PIDSurface, + HierarchicalSurface, + NonlinearDampingSurface, + GlobalSurface, + PredefinedTimeSurface, +) +from opensmc.controllers import ( + ClassicalSMC, + AdaptiveSMC, + DynamicSMC, + ITSMC, + NFTSMC, + FixedTimeSMC, + FuzzySMC, + DiscreteSMC, + CombiningHSMC, + AggregatedHSMC, + IncrementalHSMC, + TwistingSMC, + QuasiContinuous2SMC, + NestedHOSMC, + QuasiContinuousHOSMC, + PID, + LQR, +) +from opensmc.simulator import Simulator + + +PLANT_REGISTRY = { + "double_integrator": DoubleIntegrator, + "inverted_pendulum": InvertedPendulum, + "crane": SinglePendulumCrane, + "nanopositioner": DualStageNanopositioner, + "two_link_arm": TwoLinkArm, +} + +DISTURBANCE_REGISTRY = { + "none": lambda t: 0.0, + "constant": lambda t: 1.0, + "sinusoidal": lambda t: np.sin(5.0 * t), + "step": lambda t: 1.0 if t >= 0.5 else 0.0, +} + + +def _sat(y): + """Saturation function: clip to [-1, 1].""" + return np.clip(y, -1.0, 1.0) + + +def _rk4_step(f, t, x, dt): + """Single RK4 integration step.""" + k1 = f(t, x) + k2 = f(t + dt / 2, x + dt / 2 * k1) + k3 = f(t + dt / 2, x + dt / 2 * k2) + k4 = f(t + dt, x + dt * k3) + return x + (dt / 6.0) * (k1 + 2 * k2 + 2 * k3 + k4) + + +def _make_result_dict(t_arr, x_arr, u_arr, xref_arr, s_arr): + """Build a metrics-compatible result dict. + + Ensures all arrays are 2D with proper shapes: + - t: (N,) + - x, xref, e: (N, n_states) + - u: (N, n_inputs) + - s: (N, n_s) + """ + t = np.asarray(t_arr) + x = np.atleast_2d(np.asarray(x_arr)) + xref = np.atleast_2d(np.asarray(xref_arr)) + u = np.atleast_2d(np.asarray(u_arr)) + s = np.atleast_2d(np.asarray(s_arr)) + + if x.shape[0] == 1 and len(t) > 1: + x = x.T + if xref.shape[0] == 1 and len(t) > 1: + xref = xref.T + if u.shape[0] == 1 and len(t) > 1: + u = u.T + if s.shape[0] == 1 and len(t) > 1: + s = s.T + + if u.ndim == 1: + u = u.reshape(-1, 1) + if s.ndim == 1: + s = s.reshape(-1, 1) + + return { + "t": t, + "x": x, + "u": u, + "xref": xref, + "e": xref - x, + "s": s, + } + + +def _compute_metrics(result): + """Compute the 6 key benchmark metrics from a result dict.""" + out = {} + try: + out["ise"] = metrics.ise(result, axis=0) + except Exception: + out["ise"] = float("inf") + try: + out["itae"] = metrics.itae(result, axis=0) + except Exception: + out["itae"] = float("inf") + try: + out["settling_time"] = metrics.settling_time(result, axis=0) + except Exception: + out["settling_time"] = float(result["t"][-1]) + try: + out["overshoot"] = metrics.overshoot(result, axis=0) + except Exception: + out["overshoot"] = float("inf") + try: + out["chattering_index"] = metrics.chattering_index(result, axis=0) + except Exception: + out["chattering_index"] = float("inf") + try: + out["ss_error"] = metrics.ss_error(result, axis=0) + except Exception: + out["ss_error"] = float("inf") + return out + + +def _simulate_fixed_law(surface, plant, T, dt, gains, ref, dist_fn=None): + """Simulate a surface with the fixed control law from SurfaceDiscoveryEnv. + + Control law: u = -K * sat(s/phi) - lam * s + + Uses its own RK4 loop (NOT Simulator.run) to match the training environment + exactly. + + Parameters + ---------- + surface : SlidingSurface + The surface to evaluate. + plant : Plant + The plant dynamics. + T : float + Simulation time. + dt : float + Time step. + gains : dict + Keys: K, lam, phi. + ref : float + Step reference value. + dist_fn : callable or None + Disturbance function d(t). + + Returns + ------- + dict with metric keys: ise, itae, settling_time, overshoot, + chattering_index, ss_error. + """ + K = gains.get("K", 5.0) + lam = gains.get("lam", 3.0) + phi = gains.get("phi", 0.05) + + if dist_fn is None: + dist_fn = lambda t: 0.0 + + N = int(T / dt) + n = plant.n_states + + x = plant.get_default_x0().copy() + + xref = np.zeros(n) + xref[0] = ref + + t_arr = np.zeros(N + 1) + x_arr = np.zeros((N + 1, n)) + u_arr = np.zeros((N + 1, 1)) + s_arr = np.zeros((N + 1, 1)) + xref_arr = np.tile(xref, (N + 1, 1)) + + if hasattr(surface, "reset"): + surface.reset() + + for i in range(N + 1): + t = i * dt + e = xref[0] - x[0] + edot = (xref[1] - x[1]) if n > 1 else -x[1] + + s = surface.compute(e, edot, t=t) + + u_val = -K * _sat(s / phi) - lam * s + + t_arr[i] = t + x_arr[i] = x.copy() + u_arr[i, 0] = u_val + s_arr[i, 0] = s + + if i < N: + d = dist_fn(t) + d_arr = np.zeros(n) + if np.ndim(d) == 0: + if n > 1: + d_arr[1] = float(d) + else: + d_arr[0] = float(d) + else: + d_arr[: min(len(d), n)] = np.asarray(d)[: min(len(d), n)] + + def f_dyn(tt, xx, uu=u_val, dd=d_arr): + return plant.dynamics(tt, xx, np.array([uu]), dd) + + x = _rk4_step(f_dyn, t, x, dt) + + result = _make_result_dict(t_arr, x_arr, u_arr, xref_arr, s_arr) + return _compute_metrics(result) + + +def _get_classical_surfaces(): + """Return dict of all 11 classical surfaces with default parameters.""" + return { + "Linear": LinearSurface(c=10.0), + "Terminal": TerminalSurface(beta=10.0, p=5, q=7), + "NonsingularTerminal": NonsingularTerminalSurface(beta=10.0, p=5, q=7), + "FastTerminal": FastTerminalSurface(alpha=2.0, beta=1.0, p=5, q=9), + "IntegralTerminal": IntegralTerminalSurface(c1=10.0, c2=5.0, p=5, q=7), + "IntegralSliding": IntegralSlidingSurface(c=10.0), + "PID": PIDSurface(alpha=1.0, beta=10.0, gamma=1.0), + "Hierarchical": HierarchicalSurface(c1=10.0, c2=10.0, lam=1.0), + "NonlinearDamping": NonlinearDampingSurface(c=10.0, beta=7.9), + "Global": GlobalSurface(c=10.0, alpha=5.0), + "PredefinedTime": PredefinedTimeSurface(Tc=2.0, c_inf=10.0), + } + + +def _get_controller_configs(plant): + """Return dict of 17 controller instances with best-matched surfaces. + + Parameters + ---------- + plant : Plant + Used to configure plant-specific controllers (LQR). + + Returns + ------- + dict mapping controller_name -> Controller instance + """ + n = plant.n_states + dt = 0.01 + + configs = {} + + configs["ClassicalSMC"] = ClassicalSMC( + surface=LinearSurface(c=10.0), k=10.0, eta=1.1 + ) + configs["AdaptiveSMC"] = AdaptiveSMC( + surface=NonlinearDampingSurface(c=10.0, beta=7.9), + gamma=100.0, eta0=0.1, lam=5.0, dt=dt, + ) + configs["DynamicSMC"] = DynamicSMC( + surface=LinearSurface(c=10.0), K=20.0, lam=10.0, q=5.0, dt=dt, + ) + configs["ITSMC"] = ITSMC( + surface=IntegralTerminalSurface(c1=10.0, c2=5.0, p=5, q=7, dt=dt), + c1=5.0, c2=2.0, K=5.0, lambda_s=3.0, phi=0.05, dt=dt, + ) + configs["NFTSMC"] = NFTSMC( + surface=NonsingularTerminalSurface(beta=10.0, p=5, q=7), + alpha=5.0, beta=2.0, gamma=1.4, + k1=10.0, k2=3.0, rho=0.714, eta=1.0, dt=dt, + ) + configs["FixedTimeSMC"] = FixedTimeSMC( + surface=PredefinedTimeSurface(Tc=2.0, c_inf=10.0), + alpha=5.0, beta=5.0, p=0.5, q=1.5, + ) + configs["FuzzySMC"] = FuzzySMC( + surface=LinearSurface(c=10.0), k=15.0, dt=dt, + ) + configs["DiscreteSMC"] = DiscreteSMC( + surface=LinearSurface(c=10.0), q_rate=10.0, epsilon=5.0, Ts=dt, + ) + configs["CombiningHSMC"] = CombiningHSMC( + c=0.01, alpha=0.28, kappa=1.0, eta=0.025, + ) + configs["AggregatedHSMC"] = AggregatedHSMC( + c1=0.7, c2=8.2, alpha=-2.3, kappa=3.0, eta=0.1, + ) + configs["IncrementalHSMC"] = IncrementalHSMC( + c1=0.7, c2=2.0, c3=0.3, kappa=5.0, eta=0.5, + ) + configs["TwistingSMC"] = TwistingSMC( + r1=10.0, r2=5.0, c=1.0, surface=LinearSurface(c=10.0), + ) + configs["QuasiContinuous2SMC"] = QuasiContinuous2SMC( + alpha=10.0, beta=2.0, c=1.0, surface=LinearSurface(c=10.0), + ) + configs["NestedHOSMC"] = NestedHOSMC(order=2, alpha=10.0, c=1.0) + configs["QuasiContinuousHOSMC"] = QuasiContinuousHOSMC( + order=2, alpha=10.0, c=1.0, + ) + configs["PID"] = PID(Kp=10.0, Kd=5.0, Ki=0.5, dt=dt) + + A = np.zeros((n, n)) + if n >= 2: + A[0, 1] = 1.0 + B = np.zeros((n, 1)) + if n >= 2: + B[1, 0] = 1.0 + elif n == 1: + B[0, 0] = 1.0 + Q = np.eye(n) * 10.0 + R = np.eye(1) + configs["LQR"] = LQR(A=A, B=B, Q=Q, R=R) + + return configs + + +def _is_crane_controller(name): + """Check if a controller requires crane-like 4-state dynamics.""" + return name in ("CombiningHSMC", "AggregatedHSMC", "IncrementalHSMC") + + +def _simulate_matched_controller(controller_name, plant, T, dt, ref, dist_fn=None): + """Simulate a specific controller from the factory on a given plant. + + Uses Simulator.run() with each controller's own compute() method. + + Parameters + ---------- + controller_name : str + Name of the controller (must be a key in _get_controller_configs). + plant : Plant + The plant to simulate. + T : float + Simulation time. + dt : float + Time step. + ref : float + Step reference value. + dist_fn : callable or None + Disturbance function. + + Returns + ------- + dict with metric keys. + """ + configs = _get_controller_configs(plant) + if controller_name not in configs: + return { + "ise": float("inf"), + "itae": float("inf"), + "settling_time": T, + "overshoot": float("inf"), + "chattering_index": float("inf"), + "ss_error": float("inf"), + } + + controller = configs[controller_name] + + n = plant.n_states + + if _is_crane_controller(controller_name) and n < 4: + return { + "ise": float("nan"), + "itae": float("nan"), + "settling_time": float("nan"), + "overshoot": float("nan"), + "chattering_index": float("nan"), + "ss_error": float("nan"), + } + + xref = np.zeros(n) + xref[0] = ref + + def ref_fn(t): + return xref + + sim = Simulator(dt=dt, T=T, method="rk4") + + try: + controller.reset() + result = sim.run(controller, plant, ref_fn=ref_fn, dist_fn=dist_fn) + return _compute_metrics(result) + except Exception as exc: + warnings.warn( + f"Controller {controller_name} failed on {plant.name}: {exc}", + stacklevel=2, + ) + return { + "ise": float("inf"), + "itae": float("inf"), + "settling_time": T, + "overshoot": float("inf"), + "chattering_index": float("inf"), + "ss_error": float("inf"), + } + + +@dataclass +class BenchmarkResults: + """Container for benchmark results with export methods.""" + + table1: pd.DataFrame = field(default_factory=pd.DataFrame) + table2: pd.DataFrame = field(default_factory=pd.DataFrame) + fingerprints: dict = field(default_factory=dict) + rankings: dict = field(default_factory=dict) + + def to_latex(self): + """Export Table 1 as LaTeX tabular string.""" + if self.table1 is not None and len(self.table1) > 0: + return self.table1.to_latex( + index=True, float_format="%.4f", escape=False + ) + return "" + + def to_json(self, path): + """Export all results to JSON file.""" + t1 = self.table1.reset_index().to_dict(orient="records") if self.table1 is not None and len(self.table1) > 0 else [] + t2 = self.table2.reset_index().to_dict(orient="records") if self.table2 is not None and len(self.table2) > 0 else [] + data = { + "table1": t1, + "table2": t2, + "fingerprints": self.fingerprints, + "rankings": self.rankings, + } + + def _convert(obj): + if isinstance(obj, (np.integer,)): + return int(obj) + if isinstance(obj, (np.floating,)): + return float(obj) + if isinstance(obj, np.ndarray): + return obj.tolist() + if isinstance(obj, float) and (np.isnan(obj) or np.isinf(obj)): + return str(obj) + raise TypeError(f"Object of type {type(obj)} is not JSON serializable") + + with open(path, "w") as f: + json.dump(data, f, indent=2, default=_convert) + + def summary(self): + """Generate a text summary of results.""" + lines = [] + lines.append("=== OpenSMC Benchmark Results ===") + + if self.table1 is not None and len(self.table1) > 0: + lines.append(f"\nTable 1 (Fixed Law): {len(self.table1)} entries") + if "ise" in self.table1.columns: + best_idx = self.table1["ise"].idxmin() + best_ise = self.table1.loc[best_idx, "ise"] + lines.append(f" Best surface (ISE): {best_idx} = {best_ise:.6f}") + + if self.table2 is not None and len(self.table2) > 0: + lines.append(f"\nTable 2 (Matched Controller): {len(self.table2)} entries") + if "ise" in self.table2.columns: + valid = self.table2["ise"].replace([np.inf, -np.inf], np.nan).dropna() + if len(valid) > 0: + best_idx = valid.idxmin() + best_ise = valid[best_idx] + lines.append( + f" Best controller (ISE): {best_idx} = {best_ise:.6f}" + ) + + if self.rankings: + lines.append("\nRankings (by ISE):") + for plant_name, ranking in self.rankings.items(): + lines.append(f" {plant_name}: {ranking[:3]}") + + return "\n".join(lines) + + +def benchmark( + plants=None, + trained_models_dir=None, + algorithms=None, + disturbances=None, + T=5.0, + dt=0.01, + ref=1.0, + gains=None, + include_matched=True, +): + """Run the full benchmark: RL surfaces vs 17 classical controllers. + + Parameters + ---------- + plants : list of str or None + Plant names from PLANT_REGISTRY. None = all 5. + trained_models_dir : str or None + Directory containing trained RL models. None = skip RL surfaces. + algorithms : list of str or None + RL algorithms to include (e.g., ["PPO", "SAC"]). Empty list = skip RL. + disturbances : list of str or None + Disturbance types from DISTURBANCE_REGISTRY. None = all 4. + T : float + Simulation time. + dt : float + Time step. + ref : float + Step reference value. + gains : dict or None + Fixed-law gains {K, lam, phi}. None = defaults. + include_matched : bool + Whether to generate Table 2 (matched controllers). + + Returns + ------- + BenchmarkResults + """ + if plants is None: + plants = list(PLANT_REGISTRY.keys()) + if disturbances is None: + disturbances = list(DISTURBANCE_REGISTRY.keys()) + if algorithms is None: + algorithms = [] + if gains is None: + gains = {"K": 5.0, "lam": 3.0, "phi": 0.05} + + table1_rows = [] + table2_rows = [] + rankings = {} + + for plant_name in plants: + if plant_name not in PLANT_REGISTRY: + warnings.warn(f"Unknown plant: {plant_name}", stacklevel=2) + continue + + plant = PLANT_REGISTRY[plant_name]() + + for dist_name in disturbances: + dist_fn = DISTURBANCE_REGISTRY.get(dist_name, lambda t: 0.0) + + surfaces = _get_classical_surfaces() + for surf_name, surface in surfaces.items(): + if hasattr(surface, "reset"): + surface.reset() + try: + result = _simulate_fixed_law( + surface=surface, + plant=plant, + T=T, + dt=dt, + gains=gains, + ref=ref, + dist_fn=dist_fn, + ) + row = { + "plant": plant_name, + "disturbance": dist_name, + "surface": surf_name, + **result, + } + table1_rows.append(row) + except Exception as exc: + warnings.warn( + f"Surface {surf_name} failed on {plant_name}/{dist_name}: {exc}", + stacklevel=2, + ) + + if include_matched: + ctrl_configs = _get_controller_configs(plant) + plant_rankings = [] + + for ctrl_name in ctrl_configs: + try: + result = _simulate_matched_controller( + controller_name=ctrl_name, + plant=plant, + T=T, + dt=dt, + ref=ref, + dist_fn=dist_fn, + ) + row = { + "plant": plant_name, + "disturbance": dist_name, + "controller": ctrl_name, + **result, + } + table2_rows.append(row) + if np.isfinite(result.get("ise", float("inf"))): + plant_rankings.append((ctrl_name, result["ise"])) + except Exception as exc: + warnings.warn( + f"Controller {ctrl_name} failed on " + f"{plant_name}/{dist_name}: {exc}", + stacklevel=2, + ) + + plant_rankings.sort(key=lambda x: x[1]) + key = f"{plant_name}/{dist_name}" + rankings[key] = [name for name, _ in plant_rankings] + + table1 = pd.DataFrame(table1_rows) + if len(table1) > 0: + table1 = table1.set_index(["plant", "disturbance", "surface"]) + + table2 = pd.DataFrame(table2_rows) if include_matched else pd.DataFrame() + if len(table2) > 0: + table2 = table2.set_index(["plant", "disturbance", "controller"]) + + return BenchmarkResults( + table1=table1, + table2=table2, + fingerprints={}, + rankings=rankings, + ) diff --git a/python/tests/test_benchmark.py b/python/tests/test_benchmark.py new file mode 100644 index 0000000..e27abcf --- /dev/null +++ b/python/tests/test_benchmark.py @@ -0,0 +1,114 @@ +"""Tests for benchmark module -- RL vs classical controllers.""" + +import os +import numpy as np +import pytest + +pytest.importorskip("pandas") + + +class TestBenchmarkFixedLaw: + def test_simulate_single_surface(self): + from opensmc.rl.benchmark import _simulate_fixed_law + from opensmc.surfaces import LinearSurface + from opensmc.plants import DoubleIntegrator + result = _simulate_fixed_law( + surface=LinearSurface(c=10), + plant=DoubleIntegrator(), + T=1.0, dt=0.01, + gains={"K": 5, "lam": 3, "phi": 0.05}, + ref=1.0, + ) + assert "ise" in result + assert "settling_time" in result + assert result["ise"] > 0 + assert np.isfinite(result["ise"]) + + def test_all_surfaces_run(self): + from opensmc.rl.benchmark import _get_classical_surfaces + surfaces = _get_classical_surfaces() + assert len(surfaces) == 11 + + +class TestBenchmarkMatchedController: + def test_simulate_single_controller(self): + from opensmc.rl.benchmark import _simulate_matched_controller + from opensmc.plants import DoubleIntegrator + result = _simulate_matched_controller( + controller_name="ClassicalSMC", + plant=DoubleIntegrator(), + T=1.0, dt=0.01, ref=1.0, + ) + assert "ise" in result + assert np.isfinite(result["ise"]) + + def test_controller_factory_returns_17(self): + from opensmc.rl.benchmark import _get_controller_configs + from opensmc.plants import DoubleIntegrator + configs = _get_controller_configs(DoubleIntegrator()) + assert len(configs) == 17 + + +class TestBenchmarkFull: + def test_benchmark_mini(self): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, + algorithms=[], + disturbances=["none"], + T=1.0, dt=0.01, + include_matched=False, + ) + assert results.table1 is not None + assert len(results.table1) > 0 + + def test_benchmark_with_matched(self): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, + algorithms=[], + disturbances=["none"], + T=1.0, dt=0.01, + include_matched=True, + ) + assert results.table2 is not None + assert len(results.table2) > 0 + + +class TestBenchmarkOutput: + def test_to_latex(self): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, algorithms=[], + disturbances=["none"], T=1.0, dt=0.01, + include_matched=False, + ) + latex = results.to_latex() + assert "tabular" in latex or "\\begin" in latex + + def test_to_json(self, tmp_path): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, algorithms=[], + disturbances=["none"], T=1.0, dt=0.01, + include_matched=False, + ) + path = str(tmp_path / "results.json") + results.to_json(path) + assert os.path.exists(path) + + def test_summary_string(self): + from opensmc.rl.benchmark import benchmark + results = benchmark( + plants=["double_integrator"], + trained_models_dir=None, algorithms=[], + disturbances=["none"], T=1.0, dt=0.01, + include_matched=False, + ) + s = results.summary() + assert isinstance(s, str) + assert len(s) > 0 From 55c5f9035857d271214f3c996ae4d85badbf8e0d Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 11:54:04 +0300 Subject: [PATCH 08/10] =?UTF-8?q?feat:=20add=20RL=20visualization=20module?= =?UTF-8?q?=20=E2=80=94=20heatmaps,=20radar,=20contours,=20bars?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 7 plotting functions (training_curve, surface_heatmap, contour_overlay, radar_chart, cross_plant_radar, benchmark_bars, time_domain) with corresponding 7 pytest tests using Agg backend. Co-Authored-By: Claude Sonnet 4.6 --- python/opensmc/rl/visualize.py | 150 +++++++++++++++++++++++++++++++++ python/tests/test_visualize.py | 98 +++++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 python/opensmc/rl/visualize.py create mode 100644 python/tests/test_visualize.py diff --git a/python/opensmc/rl/visualize.py b/python/opensmc/rl/visualize.py new file mode 100644 index 0000000..755036a --- /dev/null +++ b/python/opensmc/rl/visualize.py @@ -0,0 +1,150 @@ +"""Visualization — publication-quality plots for RL surface analysis. + +All functions return matplotlib.Figure. Optional save_path for PNG/PDF export. +""" + +import numpy as np + +try: + import matplotlib + import matplotlib.pyplot as plt +except ImportError: + raise ImportError("matplotlib is required. Install with: pip install opensmc[viz]") + + +def _save_and_return(fig, save_path): + if save_path: + fig.savefig(save_path, dpi=300, bbox_inches="tight") + return fig + + +def training_curve(reward_history, ise_history, eval_freq=10_000, save_path=None): + steps = [eval_freq * (i + 1) for i in range(len(reward_history))] + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4)) + ax1.plot(steps, reward_history, "b-", linewidth=1.5) + ax1.set_xlabel("Training steps") + ax1.set_ylabel("Cumulative reward") + ax1.set_title("Reward") + ax1.grid(True, alpha=0.3) + ax2.plot(steps, ise_history, "r-", linewidth=1.5) + ax2.set_xlabel("Training steps") + ax2.set_ylabel("ISE") + ax2.set_title("Integrated Squared Error") + ax2.grid(True, alpha=0.3) + fig.tight_layout() + return _save_and_return(fig, save_path) + + +def surface_heatmap(sigma_fn=None, model_path=None, e_range=(-3, 3), + edot_range=(-3, 3), resolution=100, save_path=None): + if sigma_fn is None and model_path is not None: + from opensmc.rl import RLDiscoveredSurface + surface = RLDiscoveredSurface(model_path, use_4d_obs=True) + sigma_fn = lambda e, ed: surface.compute(e, ed) + e = np.linspace(*e_range, resolution) + ed = np.linspace(*edot_range, resolution) + E, ED = np.meshgrid(e, ed) + S = np.vectorize(sigma_fn)(E, ED) + fig, ax = plt.subplots(figsize=(7, 5)) + im = ax.pcolormesh(E, ED, S, cmap="RdBu_r", shading="auto") + ax.contour(E, ED, S, levels=[0], colors="k", linewidths=2) + fig.colorbar(im, ax=ax, label=r"$\sigma(e, \dot{e})$") + ax.set_xlabel(r"$e$") + ax.set_ylabel(r"$\dot{e}$") + ax.set_title("Discovered Sliding Surface") + return _save_and_return(fig, save_path) + + +def contour_overlay(surfaces, e_range=(-3, 3), edot_range=(-3, 3), + resolution=100, save_path=None): + e = np.linspace(*e_range, resolution) + ed = np.linspace(*edot_range, resolution) + E, ED = np.meshgrid(e, ed) + fig, ax = plt.subplots(figsize=(7, 5)) + colors = plt.cm.tab10(np.linspace(0, 1, len(surfaces))) + for (name, surface), color in zip(surfaces.items(), colors): + S = np.vectorize(lambda ei, edi: surface.compute(ei, edi))(E, ED) + ax.contour(E, ED, S, levels=[0], colors=[color], linewidths=1.5) + ax.plot([], [], color=color, label=name, linewidth=1.5) + ax.set_xlabel(r"$e$") + ax.set_ylabel(r"$\dot{e}$") + ax.set_title("Sliding Manifold Comparison (s=0)") + ax.legend(loc="upper right", fontsize=8) + ax.grid(True, alpha=0.3) + return _save_and_return(fig, save_path) + + +def radar_chart(scores, title="Fingerprint", save_path=None): + labels = list(scores.keys()) + values = list(scores.values()) + N = len(labels) + angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist() + values_closed = values + [values[0]] + angles_closed = angles + [angles[0]] + fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={"projection": "polar"}) + ax.plot(angles_closed, values_closed, "b-", linewidth=2) + ax.fill(angles_closed, values_closed, alpha=0.25, color="b") + ax.set_xticks(angles) + ax.set_xticklabels(labels, fontsize=8) + ax.set_ylim(0, 1) + ax.set_title(title, pad=20) + return _save_and_return(fig, save_path) + + +def cross_plant_radar(fingerprints_by_plant, save_path=None): + n = len(fingerprints_by_plant) + fig, axes = plt.subplots(1, n, figsize=(5 * n, 5), + subplot_kw={"projection": "polar"}) + if n == 1: + axes = [axes] + for ax, (plant, scores) in zip(axes, fingerprints_by_plant.items()): + labels = list(scores.keys()) + values = list(scores.values()) + N = len(labels) + angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist() + values_closed = values + [values[0]] + angles_closed = angles + [angles[0]] + ax.plot(angles_closed, values_closed, "b-", linewidth=2) + ax.fill(angles_closed, values_closed, alpha=0.25, color="b") + ax.set_xticks(angles) + ax.set_xticklabels(labels, fontsize=7) + ax.set_ylim(0, 1) + ax.set_title(plant, pad=15) + fig.tight_layout() + return _save_and_return(fig, save_path) + + +def benchmark_bars(data, metric="ise", title=None, save_path=None): + fig, ax = plt.subplots(figsize=(10, 5)) + names = data.index.tolist() + values = data[metric].values + colors = ["#e74c3c" if "RL" in str(n) else "#3498db" for n in names] + ax.barh(names, values, color=colors) + ax.set_xlabel(metric.upper()) + ax.set_title(title or f"Benchmark: {metric.upper()}") + ax.invert_yaxis() + fig.tight_layout() + return _save_and_return(fig, save_path) + + +def time_domain(results_dict, top_n=5, save_path=None): + fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10, 8), sharex=True) + colors = plt.cm.tab10(np.linspace(0, 1, min(top_n, len(results_dict)))) + for (name, res), color in zip(list(results_dict.items())[:top_n], colors): + t = res["t"] + ax1.plot(t, res["e"][:, 0] if res["e"].ndim > 1 else res["e"], + color=color, label=name, linewidth=1.2) + ax2.plot(t, res["s"] if "s" in res else np.zeros_like(t), + color=color, linewidth=1.2) + ax3.plot(t, res["u"][:, 0] if res["u"].ndim > 1 else res["u"], + color=color, linewidth=1.2) + ax1.set_ylabel("Error e(t)") + ax1.legend(fontsize=7, ncol=2) + ax1.grid(True, alpha=0.3) + ax2.set_ylabel("Sliding var s(t)") + ax2.grid(True, alpha=0.3) + ax3.set_ylabel("Control u(t)") + ax3.set_xlabel("Time (s)") + ax3.grid(True, alpha=0.3) + fig.tight_layout() + return _save_and_return(fig, save_path) diff --git a/python/tests/test_visualize.py b/python/tests/test_visualize.py new file mode 100644 index 0000000..6d93177 --- /dev/null +++ b/python/tests/test_visualize.py @@ -0,0 +1,98 @@ +"""Tests for visualization module.""" + +import numpy as np +import pytest + +pytest.importorskip("matplotlib") + + +class TestVisualize: + def test_training_curve(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import training_curve + fig = training_curve( + reward_history=[1.0, 2.0, 3.0], + ise_history=[0.5, 0.3, 0.1], + eval_freq=10_000, + ) + assert fig is not None + import matplotlib.pyplot as plt + assert isinstance(fig, plt.Figure) + plt.close(fig) + + def test_surface_heatmap(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import surface_heatmap + sigma_fn = lambda e, ed: e + 2 * ed + fig = surface_heatmap(sigma_fn=sigma_fn, e_range=(-3, 3), edot_range=(-3, 3)) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + def test_radar_chart(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import radar_chart + scores = {"Linear": 0.8, "Terminal": 0.6, "FastTerminal": 0.9, + "NonsingularTerminal": 0.7, "IntegralSliding": 0.3, + "IntegralTerminal": 0.5, "PID": 0.4, "Global": 0.2, + "PredefinedTime": 0.1, "NonlinearDamping": 0.3} + fig = radar_chart(scores) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + def test_benchmark_bars(self): + import matplotlib + matplotlib.use("Agg") + import pandas as pd + from opensmc.rl.visualize import benchmark_bars + data = pd.DataFrame({ + "ise": [0.1, 0.2, 0.3], + }, index=["Linear", "Terminal", "RL"]) + fig = benchmark_bars(data, metric="ise") + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + def test_save_path(self, tmp_path): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import training_curve + path = str(tmp_path / "test.png") + fig = training_curve( + reward_history=[1.0, 2.0], ise_history=[0.5, 0.3], + eval_freq=10_000, save_path=path, + ) + import os + assert os.path.exists(path) + import matplotlib.pyplot as plt + plt.close(fig) + + def test_contour_overlay(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import contour_overlay + from opensmc.surfaces import LinearSurface, FastTerminalSurface + fig = contour_overlay( + surfaces={"Linear": LinearSurface(c=10), "FastTerminal": FastTerminalSurface()}, + e_range=(-3, 3), edot_range=(-3, 3), + ) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) + + def test_cross_plant_radar(self): + import matplotlib + matplotlib.use("Agg") + from opensmc.rl.visualize import cross_plant_radar + fp = { + "double_integrator": {"Linear": 0.8, "Terminal": 0.6}, + "crane": {"Linear": 0.5, "Terminal": 0.9}, + } + fig = cross_plant_radar(fp) + assert fig is not None + import matplotlib.pyplot as plt + plt.close(fig) From ab3784f6324dabe7a884d8972e2a5398ba411620 Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 12:00:43 +0300 Subject: [PATCH 09/10] =?UTF-8?q?feat:=20complete=20Phase=20D=20integratio?= =?UTF-8?q?n=20=E2=80=94=20public=20API,=20deps,=20examples?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - opensmc/rl/__init__.py: lazy __getattr__ for SurfaceDiscoveryEnv, trainer (train_surface/train_all_surfaces/TrainingResult), benchmark/BenchmarkResults, and visualize module - pyproject.toml: add pandas>=1.5 to rl extras (required by benchmark) - examples/train_and_fingerprint.py: PPO training + fingerprint + plots - examples/full_benchmark.py: classical controller benchmark runner Co-Authored-By: Claude Sonnet 4.6 --- python/examples/full_benchmark.py | 25 +++++++++++++++ python/examples/train_and_fingerprint.py | 41 ++++++++++++++++++++++++ python/opensmc/rl/__init__.py | 30 ++++++++++++++++- python/pyproject.toml | 2 +- 4 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 python/examples/full_benchmark.py create mode 100644 python/examples/train_and_fingerprint.py diff --git a/python/examples/full_benchmark.py b/python/examples/full_benchmark.py new file mode 100644 index 0000000..7f00356 --- /dev/null +++ b/python/examples/full_benchmark.py @@ -0,0 +1,25 @@ +"""Example: Run full benchmark — RL vs all controllers. + +Usage: + cd D:/OpenSMC/python + python examples/full_benchmark.py +""" + +from opensmc.rl import benchmark, visualize + +print("Running benchmark (classical controllers only)...") +results = benchmark( + plants=["double_integrator", "inverted_pendulum"], + trained_models_dir=None, + algorithms=[], + disturbances=["none", "sinusoidal"], + T=5.0, + dt=0.01, + include_matched=True, + output_dir="benchmark_results", +) + +print("\n" + results.summary()) +print("\nLaTeX (first 20 lines):") +print("\n".join(results.to_latex().split("\n")[:20])) +print(f"\nResults saved to benchmark_results/") diff --git a/python/examples/train_and_fingerprint.py b/python/examples/train_and_fingerprint.py new file mode 100644 index 0000000..21a1358 --- /dev/null +++ b/python/examples/train_and_fingerprint.py @@ -0,0 +1,41 @@ +"""Example: Train PPO on DoubleIntegrator, fingerprint, plot radar chart. + +Usage: + cd D:/OpenSMC/python + python examples/train_and_fingerprint.py +""" + +import numpy as np +from opensmc.rl import train_surface, RLDiscoveredSurface +from opensmc.rl import fingerprinting, visualize + +print("Training PPO on DoubleIntegrator...") +result = train_surface( + plant="double_integrator", + algorithm="PPO", + disturbance="sinusoidal", + total_timesteps=50_000, + n_envs=4, + output_dir="demo_models", + seed=42, +) +print(f"Training complete in {result.training_time:.1f}s") +print(f"Final reward: {result.final_reward:.2f}, ISE: {result.final_ise:.4f}") + +surface = RLDiscoveredSurface(result.model_path, use_4d_obs=True) + +fp = fingerprinting.fingerprint(surface) +print("\nFingerprint scores:") +for name, score in sorted(fp.items(), key=lambda x: -x[1]): + print(f" {name:25s} {score:.3f}") + +visualize.training_curve( + result.reward_history, result.ise_history, + save_path="demo_models/training_curve.png" +) +visualize.surface_heatmap( + sigma_fn=lambda e, ed: surface.compute(e, ed), + save_path="demo_models/surface_heatmap.png" +) +visualize.radar_chart(fp, save_path="demo_models/radar.png") +print("\nFigures saved to demo_models/") diff --git a/python/opensmc/rl/__init__.py b/python/opensmc/rl/__init__.py index f97207e..8678b94 100644 --- a/python/opensmc/rl/__init__.py +++ b/python/opensmc/rl/__init__.py @@ -3,9 +3,37 @@ Provides: - RLDiscoveredSurface: wraps a trained SB3 policy as a sliding surface - fingerprinting: decompose any surface against 10 known SMC surface types +- SurfaceDiscoveryEnv: Gymnasium env for RL surface discovery +- train_surface / train_all_surfaces: PPO/SAC training pipeline +- benchmark: RL vs 17 classical controllers comparison +- visualize: publication-quality plots """ from .rl_surface import RLDiscoveredSurface from . import fingerprinting -__all__ = ['RLDiscoveredSurface', 'fingerprinting'] +__all__ = [ + 'RLDiscoveredSurface', + 'fingerprinting', + 'SurfaceDiscoveryEnv', + 'train_surface', 'train_all_surfaces', 'TrainingResult', + 'benchmark', 'BenchmarkResults', + 'visualize', +] + + +def __getattr__(name): + """Lazy imports for optional heavy dependencies.""" + if name == "SurfaceDiscoveryEnv": + from .discovery_env import SurfaceDiscoveryEnv + return SurfaceDiscoveryEnv + if name in ("train_surface", "train_all_surfaces", "TrainingResult"): + from . import trainer + return getattr(trainer, name) + if name in ("benchmark", "BenchmarkResults"): + from . import benchmark as bm + return getattr(bm, name) + if name == "visualize": + from . import visualize + return visualize + raise AttributeError(f"module 'opensmc.rl' has no attribute {name!r}") diff --git a/python/pyproject.toml b/python/pyproject.toml index a822f83..aea5f76 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ keywords = ["sliding-mode-control", "SMC", "control-systems", "robotics", "HOSMC"] [project.optional-dependencies] -rl = ["gymnasium>=0.29", "stable-baselines3>=2.0", "torch>=2.0"] +rl = ["gymnasium>=0.29", "stable-baselines3>=2.0", "torch>=2.0", "pandas>=1.5"] viz = ["matplotlib>=3.5"] all = ["opensmc[rl,viz]"] dev = ["pytest>=7.0", "scipy>=1.9", "opensmc[all]"] From 9e615f9fb37ba83f1536a9bdc1f8651e01730fc3 Mon Sep 17 00:00:00 2001 From: a2z Date: Thu, 19 Mar 2026 12:09:43 +0300 Subject: [PATCH 10/10] fix: replace lazy __getattr__ imports with conditional eager imports The __getattr__ pattern caused infinite recursion when attribute names matched submodule names (benchmark, visualize). Switched to try/except conditional imports. Renamed benchmark() to run_benchmark() in public API. Co-Authored-By: Claude Opus 4.6 (1M context) --- python/examples/full_benchmark.py | 4 +-- python/opensmc/rl/__init__.py | 45 +++++++++++++++++-------------- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/python/examples/full_benchmark.py b/python/examples/full_benchmark.py index 7f00356..73f6b22 100644 --- a/python/examples/full_benchmark.py +++ b/python/examples/full_benchmark.py @@ -5,10 +5,10 @@ python examples/full_benchmark.py """ -from opensmc.rl import benchmark, visualize +from opensmc.rl import run_benchmark, visualize print("Running benchmark (classical controllers only)...") -results = benchmark( +results = run_benchmark( plants=["double_integrator", "inverted_pendulum"], trained_models_dir=None, algorithms=[], diff --git a/python/opensmc/rl/__init__.py b/python/opensmc/rl/__init__.py index 8678b94..41e497d 100644 --- a/python/opensmc/rl/__init__.py +++ b/python/opensmc/rl/__init__.py @@ -5,7 +5,7 @@ - fingerprinting: decompose any surface against 10 known SMC surface types - SurfaceDiscoveryEnv: Gymnasium env for RL surface discovery - train_surface / train_all_surfaces: PPO/SAC training pipeline -- benchmark: RL vs 17 classical controllers comparison +- run_benchmark / BenchmarkResults: RL vs 17 classical controllers comparison - visualize: publication-quality plots """ @@ -15,25 +15,30 @@ __all__ = [ 'RLDiscoveredSurface', 'fingerprinting', - 'SurfaceDiscoveryEnv', - 'train_surface', 'train_all_surfaces', 'TrainingResult', - 'benchmark', 'BenchmarkResults', - 'visualize', ] +# Conditionally import modules that need optional dependencies. +# These fail gracefully if gymnasium/SB3/pandas/matplotlib aren't installed. +try: + from .discovery_env import SurfaceDiscoveryEnv + __all__.append('SurfaceDiscoveryEnv') +except ImportError: + pass -def __getattr__(name): - """Lazy imports for optional heavy dependencies.""" - if name == "SurfaceDiscoveryEnv": - from .discovery_env import SurfaceDiscoveryEnv - return SurfaceDiscoveryEnv - if name in ("train_surface", "train_all_surfaces", "TrainingResult"): - from . import trainer - return getattr(trainer, name) - if name in ("benchmark", "BenchmarkResults"): - from . import benchmark as bm - return getattr(bm, name) - if name == "visualize": - from . import visualize - return visualize - raise AttributeError(f"module 'opensmc.rl' has no attribute {name!r}") +try: + from .trainer import train_surface, train_all_surfaces, TrainingResult + __all__.extend(['train_surface', 'train_all_surfaces', 'TrainingResult']) +except ImportError: + pass + +try: + from .benchmark import benchmark as run_benchmark, BenchmarkResults + __all__.extend(['run_benchmark', 'BenchmarkResults']) +except ImportError: + pass + +try: + from . import visualize + __all__.append('visualize') +except ImportError: + pass