Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions examples/battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,24 @@

import energypy

env = energypy.make_env(electricity_prices=np.random.uniform(-1000, 1000, 2048 * 10))
env = energypy.make_env(electricity_prices=np.random.uniform(-1000, 1000, 1024 * 5))
config_random = energypy.ExperimentConfig(
env_tr=env,
agent=PPO(
policy="MlpPolicy",
env=env,
learning_rate=0.0003,
n_steps=2048,
n_steps=1024,
batch_size=64,
n_epochs=2,
gamma=0.99,
gae_lambda=0.95,
clip_range=0.2,
policy_kwargs=dict(net_arch=[64, 64]),
verbose=1,
),
name="battery_random",
n_eval_episodes=5,
)

result = energypy.run_experiment(cfg=config_random)
Expand Down
77 changes: 44 additions & 33 deletions examples/battery_arbitrage_experiments.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import pathlib
import collections
import uuid

import numpy as np
Expand All @@ -8,31 +8,47 @@
import energypy
from energypy.dataset import load_electricity_prices

data = load_electricity_prices(
data_dir=pathlib.Path("data"), download_if_missing=True, verbose=True
data = load_electricity_prices()

n_lags = 0
n_horizons = 12

data = data.with_columns(
[pl.col("price").shift(n).alias(f"lag-{n}") for n in range(n_lags, n_lags + 1)]
)
prices = data["price"]
features = prices.clone().to_frame()
features = features.with_columns(
[pl.col("price").shift(n).alias(f"lag-{n}") for n in range(48)]
data = data.with_columns(
[
pl.col("price").shift(-1 * n).alias(f"horizon-{n}")
for n in range(1, n_horizons + 1)
]
)
features = features.drop_nulls()
data = data.drop_nulls()

prices = data["price"].to_numpy()
features = data.select(
pl.selectors.starts_with("horizon-"), pl.selectors.starts_with("lag-")
).to_numpy()

te_tr_split_idx = int(data.shape[0] * 0.8)

split_idx = int(data.shape[0] // 2)
prices_tr = prices.slice(0, split_idx)
prices_te = prices.slice(split_idx, data.shape[0])
prices_tr = prices[0:te_tr_split_idx]
features_tr = features[0:te_tr_split_idx]

features_tr = features.slice(0, split_idx)
features_te = features.slice(split_idx, data.shape[0])
prices_te = prices[te_tr_split_idx:]
features_te = features[te_tr_split_idx:]

print(f"prices_tr: {prices_tr.shape} features_tr: {features_tr.shape}")
print(f"prices_te: {prices_te.shape} features_te: {features_te.shape}")

expt_guid = uuid.uuid4()
configs = []
for noise in [0, 1, 10, 100, 1000]:
noise = [0.0, 0.1, 0.5, 0.75, 1, 5, 25, 100, 1000]
for noise_var in noise:
run_guid = uuid.uuid4()
env_tr = energypy.make_env(electricity_prices=prices_tr, features=features)
env_tr = energypy.make_env(electricity_prices=prices_tr, features=features_tr)
env_te = energypy.make_env(
electricity_prices=prices_te,
features=prices_te * np.random.normal(0, noise, size=prices_te.shape[0]),
features=features_te * np.random.normal(0, noise_var, size=features_te.shape),
)

config = energypy.ExperimentConfig(
Expand All @@ -42,35 +58,30 @@
policy="MlpPolicy",
env=env_tr,
learning_rate=0.0003,
n_steps=2048,
n_steps=1024,
batch_size=64,
n_epochs=2,
gamma=0.99,
gae_lambda=0.95,
clip_range=0.2,
verbose=1,
policy_kwargs=dict(net_arch=[64, 64]),
verbose=0,
tensorboard_log=f"./data/tensorboard/battery_arbitrage_experiments/{expt_guid}/run/{run_guid}",
),
name=f"battery_noise_{noise}",
n_learning_steps=5000, # Short training for demonstration
n_eval_episodes=25,
name=f"battery_noise_{noise_var}",
n_learning_steps=5000,
n_eval_episodes=30,
)
configs.append(config)

results = energypy.run_experiments(
configs, log_dir=f"./data/tensorboard/battery_arbitrage_experiments/{expt_guid}"
)

best_idx = np.argmax([r.checkpoints[-1].mean_reward_te for r in results])
best_config = configs[best_idx]
best_result = results[best_idx].checkpoints[-1]
expt = collections.defaultdict(list)
for noise_var, result in zip(noise, results):
cp = result.checkpoints[-1]
expt["noise_var"].append(noise_var)
expt["mean_reward_te"].append(cp.mean_reward_te)

print(f"Best configuration: {best_config.name}")
print(f"Learning rate: {best_config.agent.learning_rate}")
print(f"Gamma: {best_config.agent.gamma}")
print(
f"Test reward: {best_result.mean_reward_te:.2f} ± {best_result.std_reward_te:.2f}"
)
print(
f"Train reward: {best_result.mean_reward_tr:.2f} ± {best_result.std_reward_tr:.2f}"
)
print(pl.DataFrame(expt))
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ build-backend = "hatchling.build"

[dependency-groups]
dev = [
"ipython>=9.2.0",
"isort>=6.0.1",
"mlflow>=2.21.3",
"polars>=1.27.1",
Expand Down
32 changes: 31 additions & 1 deletion src/energypy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Reinforcement learning experiments with energy environments with energypy."""

import gymnasium as gym
import numpy as np
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

from energypy.battery import Battery
from energypy.experiment import ExperimentConfig, run_experiment, run_experiments
Expand All @@ -12,11 +15,38 @@


def make_env(electricity_prices, features=None):
"""
Create a battery environment with electricity prices and optional features.

Args:
electricity_prices: A sequence of electricity prices
features: Optional features array with same length as prices.
If None, uses electricity_prices reshaped as features.

Returns:
A normalized battery environment
"""
# If features is None, use the electricity prices as features
if features is None:
# Reshape prices to make it a 2D array with shape (n, 1)
prices_array = np.array(electricity_prices)
features = prices_array.reshape(-1, 1)

env = gym.make(
"energypy/battery", electricity_prices=electricity_prices, features=features
)
env = gym.wrappers.NormalizeReward(env)
return env
env = Monitor(env, filename="./data/data.log")
# Type annotation to help the type checker understand this is a valid wrapper
from typing import Any, cast

from gymnasium import Env

# Create a function to return the environment
def env_fn():
return cast(Env[Any, Any], env)
vec_env = DummyVecEnv([env_fn])
return vec_env


__all__ = [
Expand Down
54 changes: 32 additions & 22 deletions src/energypy/battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,37 @@
import numpy as np
from numpy.typing import NDArray

NumericSequence = NDArray[np.float64] | typing.Sequence[float]

# Define a Protocol for objects that have a shape attribute
class HasShape(typing.Protocol):
shape: typing.Any


# Use Union with explicit types to ensure proper type checking
NumericSequence = typing.Union[NDArray[np.float64], typing.Sequence[float]]


class Battery(gym.Env[NDArray[np.float64], NDArray[np.float64]]):
def __init__(
self,
electricity_prices: NumericSequence = np.random.uniform(-100.0, 100, 48 * 10),
features: NumericSequence = np.random.uniform(-100.0, 100, (48 * 10, 4)),
power_mw=2.0,
capacity_mwh=4.0,
efficiency_pct=0.9,
features: NDArray[np.float64] = np.random.uniform(-100.0, 100, (48 * 10, 4)),
power_mw: float = 2.0,
capacity_mwh: float = 4.0,
efficiency_pct: float = 0.9,
initial_state_of_charge_mwh: float = 0.0,
episode_length: int = 48,
):
self.power_mw = power_mw
self.capacity_mwh = capacity_mwh
self.efficiency_pct: float = efficiency_pct
self.electricity_prices: NumericSequence = electricity_prices
# TODO - USE FEATURES!!!
self.efficiency_pct = efficiency_pct
self.electricity_prices = electricity_prices
self.features = features

assert len(self.electricity_prices) == features.shape[0], (
"Features and prices must have same length"
)
self.n_features = features.shape[1]

self.episode_length: int = episode_length
self.index: int = 0
Expand All @@ -35,13 +47,15 @@ def __init__(
self.state_of_charge_mwh: float = initial_state_of_charge_mwh
assert self.episode_length + self.n_lags <= len(self.electricity_prices)

# lagged prices and current state of charge
# Observation space includes features and current state of charge
self.observation_space: gym.spaces.Space[NDArray[np.float64]] = gym.spaces.Box(
low=-1000, high=1000, shape=(self.n_lags + self.n_horizons + 1,)
low=-1000, high=1000, shape=(self.n_features + 1,), dtype=np.float64
)

# one action - choose charge / discharge MW for the next interval
self.action_space = gym.spaces.Box(low=-power_mw, high=power_mw)
self.action_space = gym.spaces.Box(
low=-power_mw, high=power_mw, shape=(1,), dtype=np.float32
)

self.info: dict[str, list[float]] = collections.defaultdict(list)

Expand All @@ -62,18 +76,14 @@ def reset(
return self._get_obs(), self._get_info()

def _get_obs(self) -> NDArray[np.float64]:
# TODO - use internal state counter, price data
# prices with charges stacked on the end
obs = list(
self.electricity_prices[
self.index - self.n_lags : self.index + self.n_horizons
]
) + [self.state_of_charge_mwh]
obs = np.array(obs, dtype=np.float64)
return obs
# Get features for the current time step
feature_obs = self.features[self.index].tolist()
# Add state of charge to observation
obs = feature_obs + [self.state_of_charge_mwh]
return np.array(obs, dtype=np.float64)

def _get_info(self) -> dict[str, list[float]]:
# TODO - some info for experiment analysis (usually)
# Include current price and feature values in info
return self.info

def step(
Expand Down Expand Up @@ -105,7 +115,7 @@ def step(
losses=losses,
)

# TODO import & export prices
# Calculate reward using price
reward = float(self.electricity_prices[self.index] * battery_power_mw)
terminated = self.episode_step + 1 == self.episode_length
truncated = False
Expand Down
16 changes: 3 additions & 13 deletions src/energypy/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ def download_electricity_prices(

# If the final file already exists, return its path
if final_file.exists():
if verbose:
print(f"Found existing data at {final_file}")
return final_file

# Generate dates and URLs
Expand Down Expand Up @@ -93,15 +91,13 @@ def download_electricity_prices(

def load_electricity_prices(
data_dir: pathlib.Path = pathlib.Path("data"),
download_if_missing: bool = True,
verbose: bool = False,
verbose: bool = True,
) -> pl.DataFrame:
"""
Load electricity price data, downloading if necessary.

Args:
data_dir: Directory where data is stored
download_if_missing: Whether to download data if not found
verbose: Whether to print progress information

Returns:
Expand All @@ -110,14 +106,8 @@ def load_electricity_prices(
final_file = data_dir / "final.parquet"

if not final_file.exists():
if download_if_missing:
if verbose:
print("Data file not found. Downloading...")
download_electricity_prices(data_dir=data_dir, verbose=verbose)
else:
raise FileNotFoundError(
f"Data file not found at {final_file} and download_if_missing=False"
)
print("Data file not found. Downloading...")
download_electricity_prices(data_dir=data_dir, verbose=verbose)

data = pl.read_parquet(final_file)
data = data.select(
Expand Down
Loading