ADGEfficiency · ADGEfficiency · May 10, 2025 · May 8, 2025 · May 8, 2025 · May 8, 2025
diff --git a/examples/battery.py b/examples/battery.py
@@ -3,22 +3,24 @@
 
 import energypy
 
-env = energypy.make_env(electricity_prices=np.random.uniform(-1000, 1000, 2048 * 10))
+env = energypy.make_env(electricity_prices=np.random.uniform(-1000, 1000, 1024 * 5))
 config_random = energypy.ExperimentConfig(
     env_tr=env,
     agent=PPO(
         policy="MlpPolicy",
         env=env,
         learning_rate=0.0003,
-        n_steps=2048,
+        n_steps=1024,
         batch_size=64,
         n_epochs=2,
         gamma=0.99,
         gae_lambda=0.95,
         clip_range=0.2,
+        policy_kwargs=dict(net_arch=[64, 64]),
         verbose=1,
     ),
     name="battery_random",
+    n_eval_episodes=5,
 )
 
 result = energypy.run_experiment(cfg=config_random)

diff --git a/examples/battery_arbitrage_experiments.py b/examples/battery_arbitrage_experiments.py
@@ -1,4 +1,4 @@
-import pathlib
+import collections
 import uuid
 
 import numpy as np
@@ -8,31 +8,47 @@
 import energypy
 from energypy.dataset import load_electricity_prices
 
-data = load_electricity_prices(
-    data_dir=pathlib.Path("data"), download_if_missing=True, verbose=True
+data = load_electricity_prices()
+
+n_lags = 0
+n_horizons = 12
+
+data = data.with_columns(
+    [pl.col("price").shift(n).alias(f"lag-{n}") for n in range(n_lags, n_lags + 1)]
 )
-prices = data["price"]
-features = prices.clone().to_frame()
-features = features.with_columns(
-    [pl.col("price").shift(n).alias(f"lag-{n}") for n in range(48)]
+data = data.with_columns(
+    [
+        pl.col("price").shift(-1 * n).alias(f"horizon-{n}")
+        for n in range(1, n_horizons + 1)
+    ]
 )
-features = features.drop_nulls()
+data = data.drop_nulls()
+
+prices = data["price"].to_numpy()
+features = data.select(
+    pl.selectors.starts_with("horizon-"), pl.selectors.starts_with("lag-")
+).to_numpy()
+
+te_tr_split_idx = int(data.shape[0] * 0.8)
 
-split_idx = int(data.shape[0] // 2)
-prices_tr = prices.slice(0, split_idx)
-prices_te = prices.slice(split_idx, data.shape[0])
+prices_tr = prices[0:te_tr_split_idx]
+features_tr = features[0:te_tr_split_idx]
 
-features_tr = features.slice(0, split_idx)
-features_te = features.slice(split_idx, data.shape[0])
+prices_te = prices[te_tr_split_idx:]
+features_te = features[te_tr_split_idx:]
+
+print(f"prices_tr: {prices_tr.shape} features_tr: {features_tr.shape}")
+print(f"prices_te: {prices_te.shape} features_te: {features_te.shape}")
 
 expt_guid = uuid.uuid4()
 configs = []
-for noise in [0, 1, 10, 100, 1000]:
+noise = [0.0, 0.1, 0.5, 0.75, 1, 5, 25, 100, 1000]
+for noise_var in noise:
     run_guid = uuid.uuid4()
-    env_tr = energypy.make_env(electricity_prices=prices_tr, features=features)
+    env_tr = energypy.make_env(electricity_prices=prices_tr, features=features_tr)
     env_te = energypy.make_env(
         electricity_prices=prices_te,
-        features=prices_te * np.random.normal(0, noise, size=prices_te.shape[0]),
+        features=features_te * np.random.normal(0, noise_var, size=features_te.shape),
     )
 
     config = energypy.ExperimentConfig(
@@ -42,35 +58,30 @@
             policy="MlpPolicy",
             env=env_tr,
             learning_rate=0.0003,
-            n_steps=2048,
+            n_steps=1024,
             batch_size=64,
             n_epochs=2,
             gamma=0.99,
             gae_lambda=0.95,
             clip_range=0.2,
-            verbose=1,
+            policy_kwargs=dict(net_arch=[64, 64]),
+            verbose=0,
             tensorboard_log=f"./data/tensorboard/battery_arbitrage_experiments/{expt_guid}/run/{run_guid}",
         ),
-        name=f"battery_noise_{noise}",
-        n_learning_steps=5000,  # Short training for demonstration
-        n_eval_episodes=25,
+        name=f"battery_noise_{noise_var}",
+        n_learning_steps=5000,
+        n_eval_episodes=30,
     )
     configs.append(config)
 
 results = energypy.run_experiments(
     configs, log_dir=f"./data/tensorboard/battery_arbitrage_experiments/{expt_guid}"
 )
 
-best_idx = np.argmax([r.checkpoints[-1].mean_reward_te for r in results])
-best_config = configs[best_idx]
-best_result = results[best_idx].checkpoints[-1]
+expt = collections.defaultdict(list)
+for noise_var, result in zip(noise, results):
+    cp = result.checkpoints[-1]
+    expt["noise_var"].append(noise_var)
+    expt["mean_reward_te"].append(cp.mean_reward_te)
 
-print(f"Best configuration: {best_config.name}")
-print(f"Learning rate: {best_config.agent.learning_rate}")
-print(f"Gamma: {best_config.agent.gamma}")
-print(
-    f"Test reward: {best_result.mean_reward_te:.2f} ± {best_result.std_reward_te:.2f}"
-)
-print(
-    f"Train reward: {best_result.mean_reward_tr:.2f} ± {best_result.std_reward_tr:.2f}"
-)
+print(pl.DataFrame(expt))
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ build-backend = "hatchling.build"
 
 [dependency-groups]
 dev = [
+    "ipython>=9.2.0",
     "isort>=6.0.1",
     "mlflow>=2.21.3",
     "polars>=1.27.1",

diff --git a/src/energypy/__init__.py b/src/energypy/__init__.py
@@ -1,6 +1,9 @@
 """Reinforcement learning experiments with energy environments with energypy."""
 
 import gymnasium as gym
+import numpy as np
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv
 
 from energypy.battery import Battery
 from energypy.experiment import ExperimentConfig, run_experiment, run_experiments
@@ -12,11 +15,38 @@
 
 
 def make_env(electricity_prices, features=None):
+    """
+    Create a battery environment with electricity prices and optional features.
+
+    Args:
+        electricity_prices: A sequence of electricity prices
+        features: Optional features array with same length as prices.
+                 If None, uses electricity_prices reshaped as features.
+
+    Returns:
+        A normalized battery environment
+    """
+    # If features is None, use the electricity prices as features
+    if features is None:
+        # Reshape prices to make it a 2D array with shape (n, 1)
+        prices_array = np.array(electricity_prices)
+        features = prices_array.reshape(-1, 1)
+
     env = gym.make(
         "energypy/battery", electricity_prices=electricity_prices, features=features
     )
     env = gym.wrappers.NormalizeReward(env)
-    return env
+    env = Monitor(env, filename="./data/data.log")
+    # Type annotation to help the type checker understand this is a valid wrapper
+    from typing import Any, cast
+
+    from gymnasium import Env
+
+    # Create a function to return the environment
+    def env_fn():
+        return cast(Env[Any, Any], env)
+    vec_env = DummyVecEnv([env_fn])
+    return vec_env
 
 
 __all__ = [

diff --git a/src/energypy/battery.py b/src/energypy/battery.py
@@ -6,25 +6,37 @@
 import numpy as np
 from numpy.typing import NDArray
 
-NumericSequence = NDArray[np.float64] | typing.Sequence[float]
+
+# Define a Protocol for objects that have a shape attribute
+class HasShape(typing.Protocol):
+    shape: typing.Any
+
+
+# Use Union with explicit types to ensure proper type checking
+NumericSequence = typing.Union[NDArray[np.float64], typing.Sequence[float]]
 
 
 class Battery(gym.Env[NDArray[np.float64], NDArray[np.float64]]):
     def __init__(
         self,
         electricity_prices: NumericSequence = np.random.uniform(-100.0, 100, 48 * 10),
-        features: NumericSequence = np.random.uniform(-100.0, 100, (48 * 10, 4)),
-        power_mw=2.0,
-        capacity_mwh=4.0,
-        efficiency_pct=0.9,
+        features: NDArray[np.float64] = np.random.uniform(-100.0, 100, (48 * 10, 4)),
+        power_mw: float = 2.0,
+        capacity_mwh: float = 4.0,
+        efficiency_pct: float = 0.9,
         initial_state_of_charge_mwh: float = 0.0,
         episode_length: int = 48,
     ):
         self.power_mw = power_mw
         self.capacity_mwh = capacity_mwh
-        self.efficiency_pct: float = efficiency_pct
-        self.electricity_prices: NumericSequence = electricity_prices
-        # TODO - USE FEATURES!!!
+        self.efficiency_pct = efficiency_pct
+        self.electricity_prices = electricity_prices
+        self.features = features
+
+        assert len(self.electricity_prices) == features.shape[0], (
+            "Features and prices must have same length"
+        )
+        self.n_features = features.shape[1]
 
         self.episode_length: int = episode_length
         self.index: int = 0
@@ -35,13 +47,15 @@ def __init__(
         self.state_of_charge_mwh: float = initial_state_of_charge_mwh
         assert self.episode_length + self.n_lags <= len(self.electricity_prices)
 
-        # lagged prices and current state of charge
+        # Observation space includes features and current state of charge
         self.observation_space: gym.spaces.Space[NDArray[np.float64]] = gym.spaces.Box(
-            low=-1000, high=1000, shape=(self.n_lags + self.n_horizons + 1,)
+            low=-1000, high=1000, shape=(self.n_features + 1,), dtype=np.float64
         )
 
         # one action - choose charge / discharge MW for the next interval
-        self.action_space = gym.spaces.Box(low=-power_mw, high=power_mw)
+        self.action_space = gym.spaces.Box(
+            low=-power_mw, high=power_mw, shape=(1,), dtype=np.float32
+        )
 
         self.info: dict[str, list[float]] = collections.defaultdict(list)
 
@@ -62,18 +76,14 @@ def reset(
         return self._get_obs(), self._get_info()
 
     def _get_obs(self) -> NDArray[np.float64]:
-        # TODO - use internal state counter, price data
-        # prices with charges stacked on the end
-        obs = list(
-            self.electricity_prices[
-                self.index - self.n_lags : self.index + self.n_horizons
-            ]
-        ) + [self.state_of_charge_mwh]
-        obs = np.array(obs, dtype=np.float64)
-        return obs
+        # Get features for the current time step
+        feature_obs = self.features[self.index].tolist()
+        # Add state of charge to observation
+        obs = feature_obs + [self.state_of_charge_mwh]
+        return np.array(obs, dtype=np.float64)
 
     def _get_info(self) -> dict[str, list[float]]:
-        # TODO - some info for experiment analysis (usually)
+        # Include current price and feature values in info
         return self.info
 
     def step(
@@ -105,7 +115,7 @@ def step(
             losses=losses,
         )
 
-        # TODO import & export prices
+        # Calculate reward using price
         reward = float(self.electricity_prices[self.index] * battery_power_mw)
         terminated = self.episode_step + 1 == self.episode_length
         truncated = False

diff --git a/src/energypy/dataset.py b/src/energypy/dataset.py
@@ -29,8 +29,6 @@ def download_electricity_prices(
 
     # If the final file already exists, return its path
     if final_file.exists():
-        if verbose:
-            print(f"Found existing data at {final_file}")
         return final_file
 
     # Generate dates and URLs
@@ -93,15 +91,13 @@ def download_electricity_prices(
 
 def load_electricity_prices(
     data_dir: pathlib.Path = pathlib.Path("data"),
-    download_if_missing: bool = True,
-    verbose: bool = False,
+    verbose: bool = True,
 ) -> pl.DataFrame:
     """
     Load electricity price data, downloading if necessary.
 
     Args:
         data_dir: Directory where data is stored
-        download_if_missing: Whether to download data if not found
         verbose: Whether to print progress information
 
     Returns:
@@ -110,14 +106,8 @@ def load_electricity_prices(
     final_file = data_dir / "final.parquet"
 
     if not final_file.exists():
-        if download_if_missing:
-            if verbose:
-                print("Data file not found. Downloading...")
-            download_electricity_prices(data_dir=data_dir, verbose=verbose)
-        else:
-            raise FileNotFoundError(
-                f"Data file not found at {final_file} and download_if_missing=False"
-            )
+        print("Data file not found. Downloading...")
+        download_electricity_prices(data_dir=data_dir, verbose=verbose)
 
     data = pl.read_parquet(final_file)
     data = data.select(