Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ rl_lib/tests/models/

#jupiter notebooks
*.ipynb


.vscode
requirements.txt
dist
43 changes: 29 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@ RL_Lib - это мощный и гибкий инструмент для обу
<li>DQN и его модификации</li>
<li>DRQN</li>
<li>DDPG</li>
<li>QR_DQN</li>
</ul>

## Базовое использование
#### Создание алгоритма по умолчанию (конфиг можно посмотреть в папке алгоритма):
```
from rl_lib.src.algoritms.dqn.dqn import DQN
from rl_lib import DQN

config = {'model_config':{}}
config['model_config']['input_shape'] = env.observation_space.shape
Expand All @@ -42,25 +43,40 @@ config['model_config']['action_space'] = env.action_space.n
algo = DQN(config)
```

#### Создание алгоритма пользовательского алгоритма:
#### Загрузка пользовательской конфигурации алгоритма:
```
from rl_lib.src.algoritms.dqn.dqn import DQN
from yaml import safe_load
from rl_lib import DQN
from rl_lib import load_default_config

path = #путь к файлу конфигурации
path = #путь к файлу конфигурации, должен оканчиваться на .yaml

config = safe_load(
open(
os_path.join(
os_path.dirname(path),"./config.yaml"
),
"rb")
)
config = load_default_config(path)
config['model_config']['input_shape'] = env.observation_space.shape
config['model_config']['action_space'] = env.action_space.n

algo = DQN(config)
```

#### Верхнеуровневое API для обучения алгоритма:
```
from rl_lib import DQN
from rl_lib import load_default_config
from rl_lib import Base_Env_Runner

path = #путь к файлу конфигурации, должен оканчиваться на .yaml

config = load_default_config(path)
config['model_config']['input_shape'] = env.observation_space.shape
config['model_config']['action_space'] = env.action_space.n
algo = DQN(config)

runner = Base_Env_Runner(env=env,
algo=algo,
...)

runner.run()
```

## Основные методы алгоритма
#### Сохранение и загрузка сохраненного алгоритма:
```
Expand Down Expand Up @@ -110,14 +126,13 @@ algo.initial_state()
<ul type="disk">
<li>Реализация алгоритмов:
<ul>
<li>QR-DQN</li>
<li>IQN</li>
<li>A2C</li>
<li>TD3</li>
<li>Ape-X</li>
<li>RD2D</li>
<li>Bandits</li>
</ul>
<li>Добавление LaziFrames в буферы сохранения</li>
<li>Добавление LazyFrames в буферы сохранения</li>
<li>Написание обертки шагов обучения в среде</li>
<li>Реализация записи статистики обучения</li>
19 changes: 12 additions & 7 deletions examples/ddpg/car_racing/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,26 @@ model_config:
action_space: None
discount_factor : 0.99
n_step: 1
batch_size: 32
batch_size: 16
double_network: False
priority: False


actor_model_config:
model_config:
model: None
tau: 0.01
tau: 0.001

critic_model_config:
model_config:
model: None
tau: 0.01
tau: 0.001

actor_optimizer_config:
optimizer_config:
optimizer_name: "adam"
optimizer_params:
learning_rate: 0.001
learning_rate: 0.0001
epsilon: 0.001
clipnorm: 1.0
custom_optimizer: None
Expand All @@ -34,7 +34,7 @@ critic_optimizer_config:
optimizer_config:
optimizer_name: "adam"
optimizer_params:
learning_rate: 0.002
learning_rate: 0.001
epsilon: 0.001
clipnorm: 1.0
custom_optimizer: None
Expand All @@ -54,9 +54,14 @@ buffer_config:
exploration_config:
strategy_name: "ou_noise"
strategy_config:
alpha: 0.5
sigma: 1.0
alpha: 0.0
sigma: 0.2
action_space: None
upper_bound: None
lower_bound: None
dt: 0.01
mean: None
theta: 0.15

data_saver:
path: ""
Expand Down
5 changes: 3 additions & 2 deletions examples/ddpg/car_racing/ddpg_car_racing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pprint import pprint
import traceback

from rl_lib.src.algoritms.ddpg.ddpg import DDPG
from rl_lib import DDPG
from rl_lib.src.data_saver.utils import load_default_config

env = gym.make('CarRacing-v2')
Expand Down Expand Up @@ -42,7 +42,7 @@ def create_critic_model():
flatten = layers.Flatten()(concat)
dence_layer1 = layers.Dense(256, activation='relu')(flatten)
dence_layer2 = layers.Dense(256, activation='relu')(dence_layer1)
dence_out = layers.Dense(env.action_space.shape[0], activation=None)(dence_layer2)
dence_out = layers.Dense(1, activation=None)(dence_layer2)

return tf.keras.Model(inputs=[input_layer, input_action_layer], outputs=dence_out)

Expand Down Expand Up @@ -81,6 +81,7 @@ def run(algo):

observation, info = env.reset()
episode_reward = 0
episode_loss = []
for step in range(1, steps+1):
action = algo.get_action(observation)
new_observation, reward, done, _, info = env.step(action)
Expand Down
3 changes: 2 additions & 1 deletion examples/dqn/cart_pole/dqn_cart_pole.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import tensorflow as tf
from pprint import pprint

from rl_lib.src.algoritms.dqn.dqn import DQN
from rl_lib import DQN
from rl_lib.src.data_saver.utils import load_default_config

env = gym.make('CartPole-v0')
Expand Down Expand Up @@ -49,6 +49,7 @@ def run(algo):

observation, info = env.reset()
episode_reward = 0
episode_loss = []
for step in range(1, steps):
action = algo.get_action(observation)
new_observation, reward, done, _, info = env.step(action)
Expand Down
2 changes: 1 addition & 1 deletion examples/drqn/cart_pole/config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#default DQN config
#default DRQN config

model_config:
model: None
Expand Down
5 changes: 3 additions & 2 deletions examples/drqn/cart_pole/drqn_cart_pole.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import tensorflow as tf
from pprint import pprint

from rl_lib.src.algoritms.drqn.drqn import DRQN
from rl_lib import DRQN
from rl_lib.src.data_saver.utils import load_default_config

env = gym.make('CartPole-v0')
Expand All @@ -26,7 +26,7 @@ def create_model(lstm_size = 32):

return tf.keras.Model(inputs=[input_layer, h_t_input, c_t_input], outputs=[dence_out, lstm[1], lstm[2]])

config = load_default_config("..\\rl_lib\\rl_lib\\examples\\drqn\\cart_pole/")
config = load_default_config(__file__)
config['model_config']['model'] = create_model(lstm_size=config['model_config']['lstm_size'])
config['model_config']['input_shape'] = env.observation_space.shape
config['model_config']['action_space'] = env.action_space.n
Expand Down Expand Up @@ -56,6 +56,7 @@ def run(algo):
observation, info = env.reset()
algo.initial_state()
episode_reward = 0
episode_loss = []
for step in range(1, steps):
action = algo.get_action(observation)
new_observation, reward, done, _, info = env.step(action)
Expand Down
3 changes: 3 additions & 0 deletions rl_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from rl_lib.src.algoritms import DDPG, DQN, DRQN, QR_DQN
from rl_lib.src.data_saver.utils import load_default_config
from rl_lib.src.runners.base_runner import Base_Env_Runner
3 changes: 2 additions & 1 deletion rl_lib/src/algoritms/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@

from .model_free.continuous_control import DDPG
from .model_free.value_based import DQN, DRQN, QR_DQN
146 changes: 0 additions & 146 deletions rl_lib/src/algoritms/a2c/actor_critic.py

This file was deleted.

Loading