Midren · Midren · Oct 17, 2022 · Oct 17, 2022 · Oct 26, 2022 · Nov 2, 2022
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 __pycache__/
+.vscode/
+runs/
+poetry.lock
diff --git a/.vimspector.json b/.vimspector.json
@@ -39,7 +39,14 @@
     "Run main": {
       "extends": "python-base",
       "configuration": {
-        "program": "main.py",
+        "program": "rl_sandbox/train.py",
+        "args": ["logger.type='tensorboard'", "training.prefill=0", "training.batch_size=4"]
+      }
+    },
+    "Run dino": {
+      "extends": "python-base",
+      "configuration": {
+        "program": "rl_sandbox/vision/slot_attention.py",
         "args": []
       }
     }

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,61 @@
+ARG BASE_IMAGE=nvidia/cudagl:11.3.0-devel
+FROM $BASE_IMAGE
+
+ARG USER_ID
+ARG GROUP_ID
+ARG USER_NAME=user
+
+RUN apt-get update \
+  && DEBIAN_FRONTEND=noninteractive apt-get install -y ssh gcc g++ gdb clang rsync tar python sudo git ffmpeg ninja-build locales \
+  && apt-get clean \
+  && sudo rm -rf /var/lib/apt/lists/*
+
+RUN ( \
+    echo 'LogLevel DEBUG2'; \
+    echo 'PermitRootLogin yes'; \
+    echo 'PasswordAuthentication yes'; \
+    echo 'Subsystem sftp /usr/lib/openssh/sftp-server'; \
+  ) > /etc/ssh/sshd_config_test_clion \
+  && mkdir /run/sshd
+
+RUN groupadd -g ${GROUP_ID} ${USER_NAME} && \
+    useradd -u ${USER_ID} -g ${GROUP_ID} -s /bin/bash -m ${USER_NAME} && \
+    yes password | passwd ${USER_NAME} && \
+    usermod -aG sudo ${USER_NAME} && \
+    echo "${USER_NAME}  ALL=(ALL) NOPASSWD:ALL" | sudo tee /etc/sudoers.d/user && \
+    chmod 440 /etc/sudoers
+
+USER ${USER_NAME}
+
+RUN git clone https://github.com/Midren/dotfiles /home/${USER_NAME}/.dotfiles && \
+    /home/${USER_NAME}/.dotfiles/install-profile ubuntu-cli
+
+RUN git config --global user.email "milromchuk@gmail.com" && \
+    git config --global user.name "Roman Milishchuk"
+
+USER root
+
+RUN apt-get update \
+  && apt-get install -y software-properties-common curl \
+  && add-apt-repository -y ppa:deadsnakes/ppa \ 
+  && DEBIAN_FRONTEND=noninteractive apt-get install -y python3.10 python3.10-dev python3.10-venv \
+  && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 \
+  && apt-get clean \
+  && sudo rm -rf /var/lib/apt/lists/*
+
+RUN sudo update-alternatives --install /usr/bin/python3 python /usr/bin/python3.10 1 \
+    && sudo update-alternatives --install /usr/bin/python python3 /usr/bin/python3.10 1
+
+USER ${USER_NAME}
+WORKDIR /home/${USER_NAME}/
+
+RUN mkdir /home/${USER_NAME}/rl_sandbox
+
+COPY pyproject.toml /home/${USER_NAME}/rl_sandbox/pyproject.toml
+COPY rl_sandbox /home/${USER_NAME}/rl_sandbox/rl_sandbox
+
+RUN cd /home/${USER_NAME}/rl_sandbox \
+    && python3.10 -m pip install --no-cache-dir -e . \
+    && rm -Rf /home/${USER_NAME}/.cache/pip
+
+
diff --git a/README.md b/README.md
@@ -0,0 +1,23 @@
+## RL sandbox
+
+## Run
+
+Build docker:
+```sh
+docker build --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) --build-arg USER_NAME=$USER -t dreamer .
+```
+
+Run docker with tty:
+```sh
+docker run --gpus 'all' -it --rm -v `pwd`:/home/$USER/rl_sandbox -w /home/$USER/rl_sandbox dreamer zsh
+```
+
+Run training inside docker on gpu 0:
+```sh
+docker run --gpus 'device=0' -it --rm -v `pwd`:/home/$USER/rl_sandbox -w /home/$USER/rl_sandbox dreamer python3 rl_sandbox/train.py --config-name config_dino
+```
+
+To run dreamer version with slot attention use:
+```
+rl_sandbox/train.py --config-name config_slotted
+```
diff --git a/config/agent/dqn_agent.yaml b/config/agent/dqn_agent.yaml
diff --git a/config/config.yaml b/config/config.yaml
diff --git a/main.py b/main.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,9 +8,38 @@ version = "0.1.0"
 description = 'Sandbox for my RL experiments'
 authors = ['Roman Milishchuk <milishchuk.roman@gmail.com>']
 packages = [{include = 'rl_sandbox'}]
+# add config directory as package data
 
+# TODO: add yapf and isort as development dependencies
 [tool.poetry.dependencies]
 python = "^3.10"
 numpy = '*'
 nptyping = '*'
-gym = "^0.26.1"
+gym = "0.25.0" # crafter requires old step api
+pygame = '*'
+moviepy = '*'
+torchvision = '*'
+torch = '^2.0'
+tensorboard = '^2.0'
+dm-control = '^1.0.0'
+unpackable = '^0.0.4'
+hydra-core = "^1.2.0"
+matplotlib = "^3.0.0"
+webdataset = "^0.2.20"
+jaxtyping = '^0.2.0'
+lovely_tensors = '^0.1.10'
+torchshow = '^0.5.0'
+crafter = '^1.8.0'
+wandb = '*'
+flatten-dict = '*'
+hydra-joblib-launcher = "*"
+
+[tool.yapf]
+based_on_style = "pep8"
+column_limit = 90
+
+[tool.pytest.ini_options]
+addopts = [
+    "--import-mode=importlib",
+]
+
diff --git a/rl_sandbox/agents/__init__.py b/rl_sandbox/agents/__init__.py
@@ -0,0 +1,2 @@
+from rl_sandbox.agents.dqn import DqnAgent
+from rl_sandbox.agents.dreamer_v2 import DreamerV2
diff --git a/rl_sandbox/agents/dqn_agent.py → rl_sandbox/agents/dqn.py b/rl_sandbox/agents/dqn_agent.py → rl_sandbox/agents/dqn.py
@@ -4,44 +4,49 @@
 from rl_sandbox.agents.rl_agent import RlAgent
 from rl_sandbox.utils.fc_nn import fc_nn_generator
 from rl_sandbox.utils.replay_buffer import (Action, Actions, Rewards, State,
-                                            States, TerminationFlag)
+                                            States, TerminationFlags)
 
 
 class DqnAgent(RlAgent):
     def __init__(self, actions_num: int,
                     obs_space_num: int,
                     hidden_layer_size: int,
                     num_layers: int,
-                    discount_factor: float):
+                    discount_factor: float,
+                    device_type: str = 'cpu'):
         self.gamma = discount_factor
         self.value_func = fc_nn_generator(obs_space_num,
                                           actions_num,
                                           hidden_layer_size,
-                                          num_layers)
+                                          num_layers,
+                                          torch.nn.ReLU).to(device_type)
         self.optimizer = torch.optim.Adam(self.value_func.parameters(), lr=1e-3)
         self.loss = torch.nn.MSELoss()
+        self.device_type = device_type
 
     def get_action(self, obs: State) -> Action:
-        return np.array(torch.argmax(self.value_func(torch.from_numpy(obs)), dim=1))
+        return np.array(torch.argmax(self.value_func(torch.from_numpy(obs.reshape(1, -1)).to(self.device_type)), dim=1).detach().cpu())[0]
 
-    def train(self, s: States, a: Actions, r: Rewards, next: States, is_finished: TerminationFlag):
+    def train(self, s: States, a: Actions, r: Rewards, next: States, is_finished: TerminationFlags):
         # Bellman error: MSE( (r + gamma * max_a Q(S_t+1, a)) -  Q(s_t, a) )
         # check for is finished
 
-        s = torch.from_numpy(s)
-        a = torch.from_numpy(a)
-        r = torch.from_numpy(r)
-        next = torch.from_numpy(next)
-        is_finished = torch.from_numpy(is_finished)
+        s = torch.from_numpy(s).to(self.device_type)
+        a = torch.from_numpy(a).to(self.device_type)
+        r = torch.from_numpy(r).to(self.device_type)
+        next = torch.from_numpy(next).to(self.device_type)
+        is_finished = torch.from_numpy(is_finished).to(self.device_type)
 
+        # TODO: normalize input
+        # TODO: double dqn with target network
         values = self.value_func(next)
         indeces = torch.argmax(values, dim=1)
-        x = r + (self.gamma * torch.gather(values, dim=1, index=indeces.unsqueeze(1)).squeeze(1)) * torch.logical_not(is_finished)
+        target = r + (self.gamma * torch.gather(values, dim=1, index=indeces.unsqueeze(1)).squeeze(1)) * torch.logical_not(is_finished)
 
-        loss = self.loss(x, torch.gather(self.value_func(s), dim=1, index=a).squeeze(1))
+        loss = self.loss(torch.gather(self.value_func(s), dim=1, index=a).squeeze(1), target.detach())
 
         self.optimizer.zero_grad()
         loss.backward()
         self.optimizer.step()
 
-        return loss.detach()
+        return {'loss': loss.detach().cpu()}
diff --git a/rl_sandbox/agents/dreamer/__init__.py b/rl_sandbox/agents/dreamer/__init__.py
@@ -0,0 +1 @@
+from .common import *
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from rl_sandbox.agents.dqn import DqnAgent
		from rl_sandbox.agents.dreamer_v2 import DreamerV2