Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
bb23237
feat: add reinforcement learning directory. The purpose of this direc…
gabriel-trigo Mar 9, 2025
f8ac4de
Merge branch 'reinforcement_learning-gabriel'
gabriel-trigo Mar 10, 2025
72241eb
chore: update .gitignore to igonore experiment results, replay buffer…
gabriel-trigo Mar 23, 2025
7ec2f9d
build: add tqdm to project dependencies. Will be used to better monit…
gabriel-trigo Mar 23, 2025
989a423
feat: add generate_gin_config_files.py script, which takes in a base …
gabriel-trigo Mar 23, 2025
5c619d2
feat: add ddpg agent implementation to agents directory
gabriel-trigo Mar 23, 2025
e55bd0a
feat: add td3 implementation to agents directory
gabriel-trigo Mar 23, 2025
75232f6
feat: improve train.py script. Added support for td3 and ddpg impleme…
gabriel-trigo Mar 25, 2025
812565f
feat: add observer that records and saves trajectories. Also added a …
gabriel-trigo Mar 25, 2025
5694dcb
fix: minor bug in print_status_observer.py. Was using the total numbe…
gabriel-trigo Mar 25, 2025
6a92d8d
feat: add eval.py script, which is used to evaluate a trained policy.…
gabriel-trigo Mar 25, 2025
c0b5679
docs: add example bash script to run the populate_starter_buffer script
gabriel-trigo Mar 25, 2025
7f8d416
chore: update .gitignore
gabriel-trigo Mar 25, 2025
111b6e2
fix: get rid of step_interval parameter in environment.py (this param…
gabriel-trigo Mar 27, 2025
4e0dd09
fix: make eval.py script use the latest learned policy checkpoint (be…
gabriel-trigo Mar 27, 2025
c15c274
tests: fix environment.py tests that were failing to conform to the c…
gabriel-trigo Mar 27, 2025
f831a1b
fix: change learning rate of td3 algorithm
gabriel-trigo Apr 11, 2025
e698c6d
Revert "fix: change learning rate of td3 algorithm"
gabriel-trigo Apr 11, 2025
0bf3927
reinforcement learning 2, merged with changes to main
gabriel-trigo May 30, 2025
c1b4322
fix: fix merge conflicts I forgot
gabriel-trigo May 30, 2025
1712eb0
fix: forgot to add resolution of merge conflicts to previous commit. …
gabriel-trigo May 31, 2025
420aad0
chore: update gitignore
gabriel-trigo May 31, 2025
18fbf48
fix: remove tqdm from isort dependencies in project file
gabriel-trigo May 31, 2025
9f4b4a2
style: styling changes from isort
gabriel-trigo May 31, 2025
a802100
fix: fix merge conflicts
gabriel-trigo Jun 4, 2025
68954e4
Merge branch 'copybara_push' into reinforcement_learning2-gabriel
s2t2 Jun 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ __pycache__/
sb1.zip
sb1/

# jupyter notebook checkpoints:
smart_control/notebooks/.ipynb_checkpoints/

# documentation site:
docs_site

# results files:
*/**/output_data/
*/**/metrics/
Expand All @@ -25,9 +31,11 @@ smart_control/learning/
smart_control/simulator/videos
smart_control/refactor/data/
smart_control/refactor/experiment_results/

# jupyter notebook checkpoints:
smart_control/notebooks/.ipynb_checkpoints/

# documentation site:
docs_site
smart_control/old/
smart_control/configs/resources/sb1/generated_configs/
smart_control/reinforcement_learning/data/
smart_control/reinforcement_learning/experiment_results/
smart_control/reinforcement_learning/eval_results/
smart_control/reinforcement_learning/test.py
smart_control/reinforcement_learning/test.ipynb
smart_control/reinforcement_learning/plots/
23 changes: 23 additions & 0 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ single_line_exclusions = ['typing']
known_first_party = ["smart_control"]
skip_glob = ['smart_control/proto/*']


Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might need to re-add tqdm to the toml.

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
16 changes: 11 additions & 5 deletions smart_control/environment/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@

import collections
import copy
import logging as log
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's prefer to remove the alias unless there is a namespace collision

import os
import time
from typing import Final, Mapping, NewType, Optional, Sequence, Tuple
from typing import Final, Mapping, NewType, Optional, Sequence, Tuple, Union

from absl import logging
import bidict
import gin
import numpy as np
Expand Down Expand Up @@ -86,8 +86,13 @@
DeviceActionTuple = Tuple[DeviceCode, Setpoint]
DeviceMeasurementTuple = Tuple[DeviceCode, MeasurementName]

logger = log.getLogger(__name__)

def all_actions_accepted(action_response: ActionResponse) -> bool:
logger = log.getLogger(__name__)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this a duplicate logger from line 84?


def all_actions_accepted(
action_response: smart_control_building_pb2.ActionResponse,
) -> bool:
"""Returns true if all single action requests have response code ACCEPTED."""

return all(
Expand Down Expand Up @@ -374,7 +379,6 @@ def __init__(
image_generator: (
building_image_generator.BuildingImageGenerator | None
) = None,
step_interval: pd.Timedelta = pd.Timedelta(5, unit="minutes"),
writer_factory: writer_lib.BaseWriterFactory | None = None,
) -> None:
"""Environment constructor.
Expand Down Expand Up @@ -427,10 +431,12 @@ def __init__(
self._end_timestamp: pd.Timestamp = self._start_timestamp + pd.Timedelta(
num_days_in_episode, unit="days"
)
self._step_interval = step_interval
self._step_interval = pd.Timedelta(self.building.time_step_sec, unit="s")
logger.info("Step Interval: %s", self._step_interval)
self._num_timesteps_in_episode = int(
(self._end_timestamp - self._start_timestamp) / self._step_interval
)
logger.info("Num Timesteps in Episode: %s", self._num_timesteps_in_episode)
self._metrics = plot_utils.init_metrics()
logging.info(
"Episode starts at %s and ends at %s; % d timesteps.",
Expand Down
3 changes: 0 additions & 3 deletions smart_control/environment/environment_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,15 +734,13 @@ def __init__(
obs_normalizer,
action_config,
discount_factor: float = 1,
step_interval: pd.Timedelta = pd.Timedelta(1, unit="minute"),
):
super().__init__(
building,
reward_function,
obs_normalizer,
action_config,
discount_factor,
step_interval=step_interval,
)
self.counter = 0

Expand All @@ -762,7 +760,6 @@ def _step(self, action) -> ts.TimeStep:
reward_function,
obs_normalizer,
action_config,
step_interval=step_interval,
)

utils.validate_py_environment(env, episodes=5)
Expand Down
143 changes: 143 additions & 0 deletions smart_control/reinforcement_learning/agents/ddpg_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""DDPG Agent implementation.

This module provides a function to create a DDPG agent with customizable parameters.
"""

from typing import Optional, Sequence

import tensorflow as tf
from tf_agents.agents import tf_agent
from tf_agents.agents.ddpg import ddpg_agent
from tf_agents.networks import network
from tf_agents.typing import types

from smart_control.reinforcement_learning.agents.networks.ddpg_networks import create_sequential_actor_network
from smart_control.reinforcement_learning.agents.networks.ddpg_networks import create_sequential_critic_network


def create_ddpg_agent(
time_step_spec: types.TimeStep,
action_spec: types.NestedTensorSpec,

# Actor network parameters
actor_fc_layers: Sequence[int] = (128, 128),
actor_network: Optional[network.Network] = None,

# Critic network parameters
critic_obs_fc_layers: Sequence[int] = (128, 64),
critic_action_fc_layers: Sequence[int] = (128, 64),
critic_joint_fc_layers: Sequence[int] = (128, 64),
critic_network: Optional[network.Network] = None,

# Optimizer parameters
actor_learning_rate: float = 3e-4,
critic_learning_rate: float = 3e-4,

# Agent parameters
ou_stddev: float = 1.0,
ou_damping: float = 1.0,
gamma: float = 0.99,
target_update_tau: float = 0.005,
target_update_period: int = 1,
reward_scale_factor: float = 1.0,

# Training parameters
gradient_clipping: Optional[float] = None,
debug_summaries: bool = False,
summarize_grads_and_vars: bool = False,
train_step_counter: Optional[tf.Variable] = None,
) -> tf_agent.TFAgent:
"""Creates a DDPG Agent.

Args:
time_step_spec: A `TimeStep` spec of the expected time_steps.

action_spec: A nest of BoundedTensorSpec representing the actions.

actor_fc_layers: Iterable of fully connected layer units for the actor network.

actor_network: Optional custom actor network to use.

critic_obs_fc_layers: Iterable of fully connected layer units for the critic
observation network.

critic_action_fc_layers: Iterable of fully connected layer units for the critic
action network.

critic_joint_fc_layers: Iterable of fully connected layer units for the joint
part of the critic network.

critic_network: Optional custom critic network to use.

actor_learning_rate: Actor network learning rate.

critic_learning_rate: Critic network learning rate.

ou_stddev: Standard deviation for the Ornstein-Uhlenbeck (OU) noise added for
exploration.

ou_damping: Damping factor for the OU noise.

gamma: Discount factor for future rewards.

target_update_tau: Factor for soft update of target networks.

target_update_period: Period for soft update of target networks.

reward_scale_factor: Multiplicative scale for the reward.

gradient_clipping: Norm length to clip gradients.

debug_summaries: Whether to emit debug summaries.

summarize_grads_and_vars: Whether to summarize gradients and variables.

train_step_counter: An optional counter to increment every time the train
op is run. Defaults to the global_step.

Returns:
A TFAgent instance with the DDPG agent.
"""
# Create train step counter if not provided
if train_step_counter is None:
train_step_counter = tf.Variable(0, trainable=False, dtype=tf.int64)

# Create networks if not provided
if actor_network is None:
actor_network = create_sequential_actor_network(
actor_fc_layers=actor_fc_layers,
action_tensor_spec=action_spec
)

if critic_network is None:
critic_network = create_sequential_critic_network(
obs_fc_layer_units=critic_obs_fc_layers,
action_fc_layer_units=critic_action_fc_layers,
joint_fc_layer_units=critic_joint_fc_layers
)

# Create agent
tf_agent = ddpg_agent.DdpgAgent(
time_step_spec=time_step_spec,
action_spec=action_spec,
actor_network=actor_network,
critic_network=critic_network,
actor_optimizer=tf.keras.optimizers.Adam(learning_rate=actor_learning_rate),
critic_optimizer=tf.keras.optimizers.Adam(learning_rate=critic_learning_rate),
ou_stddev=ou_stddev,
ou_damping=ou_damping,
target_update_tau=target_update_tau,
target_update_period=target_update_period,
td_errors_loss_fn=tf.math.squared_difference,
gamma=gamma,
reward_scale_factor=reward_scale_factor,
gradient_clipping=gradient_clipping,
debug_summaries=debug_summaries,
summarize_grads_and_vars=summarize_grads_and_vars,
train_step_counter=train_step_counter
)

# Initialize the agent
tf_agent.initialize()

return tf_agent
Loading