diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..749ccda
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
diff --git a/RED/configs/example/Figure_3_RT3D_chemostat.yaml b/RED/configs/example/Figure_3_RT3D_chemostat.yaml
index a240d0b..ba18f04 100644
--- a/RED/configs/example/Figure_3_RT3D_chemostat.yaml
+++ b/RED/configs/example/Figure_3_RT3D_chemostat.yaml
@@ -4,6 +4,8 @@ defaults:
   - _self_
 
 policy_delay: 2
+wandb_team: rl-oed
+wandb_project_name: figure3-example
 initial_explore_rate: 1
 explore_rate_mul: 1
 test_episode: False
diff --git a/RED/configs/example/Figure_4_RT3D_chemostat.yaml b/RED/configs/example/Figure_4_RT3D_chemostat.yaml
index a240d0b..f20cda3 100644
--- a/RED/configs/example/Figure_4_RT3D_chemostat.yaml
+++ b/RED/configs/example/Figure_4_RT3D_chemostat.yaml
@@ -4,6 +4,9 @@ defaults:
   - _self_
 
 policy_delay: 2
+number_of_trials: 10
+wandb_team: rl-oed
+wandb_project_name: figure4-example
 initial_explore_rate: 1
 explore_rate_mul: 1
 test_episode: False
diff --git a/examples/Figure_3_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_RT3D_chemostat/train_RT3D.py
deleted file mode 100644
index a989c61..0000000
--- a/examples/Figure_3_RT3D_chemostat/train_RT3D.py
+++ /dev/null
@@ -1,230 +0,0 @@
-
-import json
-import math
-import os
-import sys
-
-IMPORT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-sys.path.append(IMPORT_PATH)
-
-import multiprocessing
-
-import hydra
-import matplotlib.pyplot as plt
-import numpy as np
-from casadi import *
-from hydra.utils import instantiate
-from omegaconf import DictConfig, OmegaConf
-
-from RED.agents.continuous_agents.rt3d import RT3D_agent
-from RED.environments.chemostat.xdot_chemostat import xdot
-from RED.environments.OED_env import OED_env
-from RED.utils.visualization import plot_returns
-
-# https://omegaconf.readthedocs.io/en/2.3_branch/how_to_guides.html#how-to-perform-arithmetic-using-eval-as-a-resolver
-OmegaConf.register_new_resolver("eval", eval)
-
-
-@hydra.main(version_base=None, config_path="../../RED/configs", config_name="example/Figure_3_RT3D_chemostat")
-def train_RT3D(cfg : DictConfig):
-    ### config setup
-    cfg = cfg.example
-    print(
-        "--- Configuration ---",
-        OmegaConf.to_yaml(cfg, resolve=True),
-        "--- End of configuration ---",
-        sep="\n\n"
-    )
-
-    ### prepare save path
-    os.makedirs(cfg.save_path, exist_ok=True)
-    print("Results will be saved in: ", cfg.save_path)
-
-    ### agent setup
-    agent = instantiate(cfg.model)
-    explore_rate = cfg.initial_explore_rate
-    seq_dim = cfg.environment.n_observed_variables + 1 + cfg.environment.n_controlled_inputs
-
-    ### env setup
-    env, n_params = setup_env(cfg)
-    total_episodes = cfg.environment.n_episodes // cfg.environment.n_parallel_experiments
-    skip_first_n_episodes = cfg.environment.skip_first_n_experiments // cfg.environment.n_parallel_experiments
-    starting_episode = 0
-
-    history = {k: [] for k in ["returns", "actions", "rewards", "us", "explore_rate", "update_count"]}
-
-    ### load ckpt
-    if cfg.load_ckpt_dir_path is not None:
-        print(f"Loading checkpoint from: {cfg.load_ckpt_dir_path}")
-        # load the agent
-        agent_path = os.path.join(cfg.load_ckpt_dir_path, "agent.pt")
-        print(f"Loading agent from: {agent_path}")
-        additional_info = agent.load_ckpt(
-            load_path=agent_path,
-            load_target_networks=True,
-        )["additional_info"]
-        # load history
-        history_path = os.path.join(cfg.load_ckpt_dir_path, "history.json")
-        if os.path.exists(history_path):
-            print(f"Loading history from: {history_path}")
-            with open(history_path, "r") as f:
-                history = json.load(f)
-        # load explore rate
-        if "explore_rate" in history and len(history["explore_rate"]) > 0:
-            explore_rate = history["explore_rate"][-1]
-        # load starting episode
-        if "episode" in additional_info:
-            starting_episode = additional_info["episode"] + 1
-
-    ### training loop
-    for episode in range(starting_episode, total_episodes):
-        actual_params = np.random.uniform(
-            low=cfg.environment.actual_params,
-            high=cfg.environment.actual_params,
-            size=(cfg.environment.n_parallel_experiments, n_params)
-        )
-        env.param_guesses = DM(actual_params)
-        
-        ### episode buffers for agent
-        states = [env.get_initial_RL_state_parallel() for i in range(cfg.environment.n_parallel_experiments)]
-        trajectories = [[] for _ in range(cfg.environment.n_parallel_experiments)]
-        sequences = [[[0] * seq_dim] for _ in range(cfg.environment.n_parallel_experiments)]
-
-        ### episode logging buffers
-        e_returns = [0 for _ in range(cfg.environment.n_parallel_experiments)]
-        e_actions = []
-        e_rewards = [[] for _ in range(cfg.environment.n_parallel_experiments)]
-        e_us = [[] for _ in range(cfg.environment.n_parallel_experiments)]
-
-        ### reset env between episodes
-        env.reset()
-        env.param_guesses = DM(actual_params)
-        env.logdetFIMs = [[] for _ in range(cfg.environment.n_parallel_experiments)]
-        env.detFIMs = [[] for _ in range(cfg.environment.n_parallel_experiments)]
-
-        ### run an episode
-        for control_interval in range(0, cfg.environment.N_control_intervals):
-            inputs = [states, sequences]
-
-            ### get agent's actions
-            if episode < skip_first_n_episodes:
-                actions = agent.get_actions(inputs, explore_rate=1, test_episode=cfg.test_episode, recurrent=True)
-            else:
-                actions = agent.get_actions(inputs, explore_rate=explore_rate, test_episode=cfg.test_episode, recurrent=True)
-            e_actions.append(actions)
-
-            ### step env
-            outputs = env.map_parallel_step(actions.T, actual_params, continuous=True)
-            next_states = []
-            for i, obs in enumerate(outputs):
-                state, action = states[i], actions[i]
-                next_state, reward, done, _, u  = obs
-
-                ### set done flag
-                if control_interval == cfg.environment.N_control_intervals - 1 \
-                    or np.all(np.abs(next_state) >= 1) \
-                    or math.isnan(np.sum(next_state)):
-                    done = True
-
-                ### memorize transition
-                transition = (state, action, reward, next_state, done)
-                trajectories[i].append(transition)
-                sequences[i].append(np.concatenate((state, action)))
-
-                ### log episode data
-                e_us[i].append(u.tolist())
-                next_states.append(next_state)
-                e_rewards[i].append(reward)
-                e_returns[i] += reward
-            states = next_states
-
-        ### do not memorize the test trajectory (the last one)
-        if cfg.test_episode:
-            trajectories = trajectories[:-1]
-
-        ### append trajectories to memory
-        for trajectory in trajectories:
-            # check for instability
-            if np.all([np.all(np.abs(trajectory[i][0]) <= 1) for i in range(len(trajectory))]) \
-                and not math.isnan(np.sum(trajectory[-1][0])):
-                agent.memory.append(trajectory)
-
-        ### train agent
-        if episode > skip_first_n_episodes:
-            for _ in range(cfg.environment.n_parallel_experiments):
-                history["update_count"].append(history["update_count"][-1] + 1 if len(history["update_count"]) > 0 else 1)
-                update_policy = history["update_count"][-1] % cfg.policy_delay == 0
-                agent.Q_update(policy=update_policy, recurrent=True)
-        else:
-            history["update_count"].append(history["update_count"][-1] if len(history["update_count"]) > 0 else 0)
-
-        ### update explore rate
-        explore_rate = cfg.explore_rate_mul * agent.get_rate(
-            episode=episode,
-            min_rate=0,
-            max_rate=1,
-            denominator=cfg.environment.n_episodes / (11 * cfg.environment.n_parallel_experiments)
-        )
-
-        ### log results
-        history["returns"].extend(e_returns)
-        history["actions"].extend(np.array(e_actions).transpose(1, 0, 2).tolist())
-        history["rewards"].extend(e_rewards)
-        history["us"].extend(e_us)
-        history["explore_rate"].append(explore_rate)
-
-        print(
-            f"\nEPISODE: [{episode}/{total_episodes}] ({episode * cfg.environment.n_parallel_experiments} experiments)",
-            f"explore rate:\t{explore_rate:.2f}",
-            f"average return:\t{np.mean(e_returns):.5f}",
-            sep="\n",
-        )
-
-        if cfg.test_episode:
-            print(
-                f"test actions:\n{np.array(e_actions)[:, -1]}",
-                f"test rewards:\n{np.array(e_rewards)[-1, :]}",
-                f"test return:\n{np.sum(np.array(e_rewards)[-1, :])}",
-                sep="\n",
-            )
-
-        ### checkpoint
-        if (cfg.ckpt_freq is not None and episode % cfg.ckpt_freq == 0) \
-            or episode == total_episodes - 1:
-            ckpt_dir = os.path.join(cfg.save_path, f"ckpt_{episode}")
-            os.makedirs(ckpt_dir, exist_ok=True)
-            agent.save_ckpt(
-                save_path=os.path.join(ckpt_dir, "agent.pt"),
-                additional_info={
-                    "episode": episode,
-                }
-            )
-            with open(os.path.join(ckpt_dir, "history.json"), "w") as f:
-                json.dump(history, f)
-
-    ### plot
-    plot_returns(
-        returns=history["returns"],
-        explore_rates=history["explore_rate"],
-        show=False,
-        save_to_dir=cfg.save_path,
-        conv_window=25,
-    )
-
-
-def setup_env(cfg):
-    n_cores = multiprocessing.cpu_count()
-    actual_params = DM(cfg.environment.actual_params)
-    normaliser = np.array(cfg.environment.normaliser)
-    n_params = actual_params.size()[0]
-    param_guesses = actual_params
-    args = cfg.environment.y0, xdot, param_guesses, actual_params, cfg.environment.n_observed_variables, \
-        cfg.environment.n_controlled_inputs, cfg.environment.num_inputs, cfg.environment.input_bounds, \
-        cfg.environment.dt, cfg.environment.control_interval_time, normaliser
-    env = OED_env(*args)
-    env.mapped_trajectory_solver = env.CI_solver.map(cfg.environment.n_parallel_experiments, "thread", n_cores)
-    return env, n_params
-
-
-if __name__ == '__main__':
-    train_RT3D()
diff --git a/examples/Figure_4_RT3D_chemostat/train_RT3D.py b/examples/Figure_3_and_4_RT3D_chemostat/train_RT3D.py
similarity index 87%
rename from examples/Figure_4_RT3D_chemostat/train_RT3D.py
rename to examples/Figure_3_and_4_RT3D_chemostat/train_RT3D.py
index 9cd423d..6680a66 100644
--- a/examples/Figure_4_RT3D_chemostat/train_RT3D.py
+++ b/examples/Figure_3_and_4_RT3D_chemostat/train_RT3D.py
@@ -1,14 +1,15 @@
-
 import json
 import math
 import os
 import sys
+from datetime import datetime
 
+# TODO: Can we just run using python -m?
 IMPORT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.append(IMPORT_PATH)
 
 import multiprocessing
-
+import wandb
 import hydra
 import numpy as np
 from casadi import *
@@ -35,6 +36,30 @@ def train_RT3D(cfg : DictConfig):
         sep="\n\n"
     )
 
+    group_name = None
+    number_of_trials = 1
+    if hasattr(cfg, "number_of_trials"):
+        group_name = datetime.now().strftime("%d/%m/%Y_%H:%M")
+        number_of_trials = cfg.number_of_trials
+
+    original_save_path = cfg.save_path     
+    for _ in range(number_of_trials):
+        # Append datetime to differentiate trials
+        cfg.save_path = os.path.join(original_save_path, group_name)
+        run_single_experiment(cfg, group_name=group_name)
+        
+
+
+def run_single_experiment(cfg : DictConfig, group_name : str):
+
+    # start a new wandb run to track this script
+    if group_name:
+        wandb.init(reinit=True, project=cfg.wandb_project_name,
+                entity=cfg.wandb_team, group=group_name, config=dict(cfg))
+    else:
+        wandb.init(reinit=True, project=cfg.wandb_project_name,
+                entity=cfg.wandb_team, config=dict(cfg))
+
     ### prepare save path
     os.makedirs(cfg.save_path, exist_ok=True)
     print("Results will be saved in: ", cfg.save_path)
@@ -81,7 +106,7 @@ def train_RT3D(cfg : DictConfig):
         actual_params = np.random.uniform(
             low=cfg.environment.lb,
             high=cfg.environment.ub,
-            size=(cfg.environment.n_parallel_experiments, 3)
+            size=(cfg.environment.n_parallel_experiments, n_params)
         )
         env.param_guesses = DM(actual_params)
 
@@ -173,6 +198,11 @@ def train_RT3D(cfg : DictConfig):
         history["us"].extend(e_us)
         history["explore_rate"].append(explore_rate)
 
+        ### log results to w and b
+        for i in range(len(e_returns)):
+            wandb.log({"returns": e_returns[i], "actions": np.array(e_actions).transpose(1, 0, 2)[i],
+                       "us": e_us[i], "explore_rate": explore_rate})
+
         print(
             f"\nEPISODE: [{episode}/{total_episodes}] ({episode * cfg.environment.n_parallel_experiments} experiments)",
             f"explore rate:\t{explore_rate:.2f}",
@@ -211,6 +241,8 @@ def train_RT3D(cfg : DictConfig):
         conv_window=25,
     )
 
+    wandb.finish()
+
 
 def setup_env(cfg):
     n_cores = multiprocessing.cpu_count()