From 5d560e6bc8ab0c0ab1cf645ec1d9699d0decd6ba Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 17:34:52 -0500
Subject: [PATCH 01/10] Support wall observation

---
 experiments/league.py            |  15 +-
 experiments/ppo_gridnet.py       |  39 ++-
 experiments/ppo_gridnet_eval.py  |  13 +-
 experiments/ppo_gridnet_large.py | 559 -------------------------------
 gym_microrts/envs/vec_env.py     |  22 +-
 gym_microrts/microrts            |   2 +-
 hello_world.py                   |   2 +-
 tests/test_observation.py        |  49 ++-
 8 files changed, 90 insertions(+), 611 deletions(-)
 delete mode 100644 experiments/ppo_gridnet_large.py

diff --git a/experiments/league.py b/experiments/league.py
index 5e79f8b2..4c64ed75 100644
--- a/experiments/league.py
+++ b/experiments/league.py
@@ -58,7 +58,7 @@ def parse_args():
         help='the highest sigma of the trueskill evaluation')
     parser.add_argument('--output-path', type=str, default=f"league.temp.csv",
         help='the output path of the leaderboard csv')
-    parser.add_argument('--model-type', type=str, default=f"ppo_gridnet_large", choices=["ppo_gridnet_large", "ppo_gridnet"],
+    parser.add_argument('--model-type', type=str, default=f"ppo_gridnet", choices=["ppo_gridnet"],
         help='the output path of the leaderboard csv')
     parser.add_argument('--maps', nargs='+', default=["maps/16x16/basesWorkers16x16A.xml"],
         help="the maps to do trueskill evaluations")
@@ -83,18 +83,13 @@ def parse_args():
     dbpath = tmp_dbpath
 db = SqliteDatabase(dbpath)
 
-if args.model_type == "ppo_gridnet_large":
-    from ppo_gridnet_large import Agent, MicroRTSStatsRecorder
-
-    from gym_microrts.envs.vec_env import MicroRTSBotVecEnv, MicroRTSGridModeVecEnv
-else:
+if args.model_type == "ppo_gridnet":
     from ppo_gridnet import Agent, MicroRTSStatsRecorder
-
     from gym_microrts.envs.vec_env import MicroRTSBotVecEnv
-    from gym_microrts.envs.vec_env import (
-        MicroRTSGridModeSharedMemVecEnv as MicroRTSGridModeVecEnv,
-    )
+    from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
 
+else:
+    raise ValueError(f"model_type {args.model_type} is not supported")
 
 class BaseModel(Model):
     class Meta:
diff --git a/experiments/ppo_gridnet.py b/experiments/ppo_gridnet.py
index a5be7bdd..befb0e7e 100644
--- a/experiments/ppo_gridnet.py
+++ b/experiments/ppo_gridnet.py
@@ -19,9 +19,7 @@
 from torch.utils.tensorboard import SummaryWriter
 
 from gym_microrts import microrts_ai
-from gym_microrts.envs.vec_env import (
-    MicroRTSGridModeSharedMemVecEnv as MicroRTSGridModeVecEnv,
-)
+from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
 
 
 def parse_args():
@@ -410,6 +408,9 @@ def on_evaluation_done(self, future):
     )
 
     for update in range(starting_update, args.num_updates + 1):
+        step_time = 0
+        inference_time = 0
+        get_mask_time = 0
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
             frac = 1.0 - (update - 1.0) / args.num_updates
@@ -417,27 +418,34 @@ def on_evaluation_done(self, future):
             optimizer.param_groups[0]["lr"] = lrnow
 
         # TRY NOT TO MODIFY: prepare the execution of the game.
+        rollout_time_start = time.time()
         for step in range(0, args.num_steps):
             # envs.render()
             global_step += 1 * args.num_envs
             obs[step] = next_obs
             dones[step] = next_done
+
+            get_mask_time_start = time.time()
+            invalid_action_masks[step] = torch.tensor(envs.get_action_mask()).to(device)
+            get_mask_time += time.time() - get_mask_time_start
+
             # ALGO LOGIC: put action logic here
+            inference_time_start = time.time()
             with torch.no_grad():
-                invalid_action_masks[step] = torch.tensor(envs.get_action_mask()).to(device)
                 action, logproba, _, _, vs = agent.get_action_and_value(
                     next_obs, envs=envs, invalid_action_masks=invalid_action_masks[step], device=device
                 )
                 values[step] = vs.flatten()
-
             actions[step] = action
             logprobs[step] = logproba
-            try:
-                next_obs, rs, ds, infos = envs.step(action.cpu().numpy().reshape(envs.num_envs, -1))
-                next_obs = torch.Tensor(next_obs).to(device)
-            except Exception as e:
-                e.printStackTrace()
-                raise
+            cpu_action = action.cpu().numpy().reshape(envs.num_envs, -1)
+            inference_time += time.time() - inference_time_start
+
+            step_time_start = time.time()
+            next_obs, rs, ds, infos = envs.step(cpu_action)
+            step_time += time.time() - step_time_start
+
+            next_obs = torch.Tensor(next_obs).to(device)
             rewards[step], next_done = torch.Tensor(rs).to(device), torch.Tensor(ds).to(device)
 
             for info in infos:
@@ -449,6 +457,8 @@ def on_evaluation_done(self, future):
                         writer.add_scalar(f"charts/episodic_return/{key}", info["microrts_stats"][key], global_step)
                     break
 
+
+        training_time_start =  time.time()
         # bootstrap reward if not done. reached the batch limit
         with torch.no_grad():
             last_value = agent.get_value(next_obs).reshape(1, -1)
@@ -559,6 +569,13 @@ def on_evaluation_done(self, future):
         if args.kle_stop or args.kle_rollback:
             writer.add_scalar("debug/pg_stop_iter", i_epoch_pi, global_step)
         writer.add_scalar("charts/sps", int(global_step / (time.time() - start_time)), global_step)
+        writer.add_scalar("charts/sps_step", int(args.num_envs * args.num_steps / step_time), global_step)
+        writer.add_scalar("charts/sps_inference", int(args.num_envs * args.num_steps / inference_time), global_step)
+        writer.add_scalar("charts/step_time", step_time, global_step)
+        writer.add_scalar("charts/inference_time", inference_time, global_step)
+        writer.add_scalar("charts/get_mask_time", get_mask_time, global_step)
+        writer.add_scalar("charts/rollout_time", time.time() - rollout_time_start, global_step)
+        writer.add_scalar("charts/training_time", time.time() - training_time_start, global_step)
         print("SPS:", int(global_step / (time.time() - start_time)))
 
     if eval_executor is not None:
diff --git a/experiments/ppo_gridnet_eval.py b/experiments/ppo_gridnet_eval.py
index eb55a39f..a7bd1913 100644
--- a/experiments/ppo_gridnet_eval.py
+++ b/experiments/ppo_gridnet_eval.py
@@ -53,7 +53,7 @@ def parse_args():
         help="the path to the agent's model")
     parser.add_argument('--ai', type=str, default="",
         help='the opponent AI to evaluate against')
-    parser.add_argument('--model-type', type=str, default=f"ppo_gridnet_large", choices=["ppo_gridnet_large", "ppo_gridnet"],
+    parser.add_argument('--model-type', type=str, default=f"ppo_gridnet", choices=["ppo_gridnet"],
         help='the output path of the leaderboard csv')
     args = parser.parse_args()
     if not args.seed:
@@ -72,16 +72,11 @@ def parse_args():
 if __name__ == "__main__":
     args = parse_args()
 
-    if args.model_type == "ppo_gridnet_large":
-        from ppo_gridnet_large import Agent, MicroRTSStatsRecorder
-
+    if args.model_type == "ppo_gridnet":
+        from ppo_gridnet import Agent, MicroRTSStatsRecorder
         from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
     else:
-        from ppo_gridnet import Agent, MicroRTSStatsRecorder
-
-        from gym_microrts.envs.vec_env import (
-            MicroRTSGridModeSharedMemVecEnv as MicroRTSGridModeVecEnv,
-        )
+        raise ValueError(f"model_type {args.model_type} is not supported")
 
     # TRY NOT TO MODIFY: setup the environment
     experiment_name = f"{args.gym_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
diff --git a/experiments/ppo_gridnet_large.py b/experiments/ppo_gridnet_large.py
deleted file mode 100644
index 93b708ca..00000000
--- a/experiments/ppo_gridnet_large.py
+++ /dev/null
@@ -1,559 +0,0 @@
-# http://proceedings.mlr.press/v97/han19a/han19a.pdf
-
-import argparse
-import os
-import random
-import subprocess
-import time
-from concurrent.futures import ThreadPoolExecutor
-from distutils.util import strtobool
-
-import numpy as np
-import pandas as pd
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from gym.spaces import MultiDiscrete
-from stable_baselines3.common.vec_env import VecEnvWrapper, VecMonitor, VecVideoRecorder
-from torch.distributions.categorical import Categorical
-from torch.utils.tensorboard import SummaryWriter
-
-from gym_microrts import microrts_ai
-from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
-
-
-def parse_args():
-    # fmt: off
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--exp-name', type=str, default=os.path.basename(__file__).rstrip(".py"),
-        help='the name of this experiment')
-    parser.add_argument('--gym-id', type=str, default="MicroRTSGridModeVecEnv",
-        help='the id of the gym environment')
-    parser.add_argument('--learning-rate', type=float, default=2.5e-4,
-        help='the learning rate of the optimizer')
-    parser.add_argument('--seed', type=int, default=1,
-        help='seed of the experiment')
-    parser.add_argument('--total-timesteps', type=int, default=300000000,
-        help='total timesteps of the experiments')
-    parser.add_argument('--torch-deterministic', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
-        help='if toggled, `torch.backends.cudnn.deterministic=False`')
-    parser.add_argument('--cuda', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
-        help='if toggled, cuda will not be enabled by default')
-    parser.add_argument('--prod-mode', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
-        help='run the script in production mode and use wandb to log outputs')
-    parser.add_argument('--capture-video', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
-        help='weather to capture videos of the agent performances (check out `videos` folder)')
-    parser.add_argument('--wandb-project-name', type=str, default="gym-microrts",
-        help="the wandb's project name")
-    parser.add_argument('--wandb-entity', type=str, default=None,
-        help="the entity (team) of wandb's project")
-
-    # Algorithm specific arguments
-    parser.add_argument('--partial-obs', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
-        help='if toggled, the game will have partial observability')
-    parser.add_argument('--n-minibatch', type=int, default=4,
-        help='the number of mini batch')
-    parser.add_argument('--num-bot-envs', type=int, default=0,
-        help='the number of bot game environment; 16 bot envs measn 16 games')
-    parser.add_argument('--num-selfplay-envs', type=int, default=24,
-        help='the number of self play envs; 16 self play envs means 8 games')
-    parser.add_argument('--num-steps', type=int, default=256,
-        help='the number of steps per game environment')
-    parser.add_argument('--gamma', type=float, default=0.99,
-        help='the discount factor gamma')
-    parser.add_argument('--gae-lambda', type=float, default=0.95,
-        help='the lambda for the general advantage estimation')
-    parser.add_argument('--ent-coef', type=float, default=0.01,
-        help="coefficient of the entropy")
-    parser.add_argument('--vf-coef', type=float, default=0.5,
-        help="coefficient of the value function")
-    parser.add_argument('--max-grad-norm', type=float, default=0.5,
-        help='the maximum norm for the gradient clipping')
-    parser.add_argument('--clip-coef', type=float, default=0.1,
-        help="the surrogate clipping coefficient")
-    parser.add_argument('--update-epochs', type=int, default=4,
-        help="the K epochs to update the policy")
-    parser.add_argument('--kle-stop', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
-        help='If toggled, the policy updates will be early stopped w.r.t target-kl')
-    parser.add_argument('--kle-rollback', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
-        help='If toggled, the policy updates will roll back to previous policy if KL exceeds target-kl')
-    parser.add_argument('--target-kl', type=float, default=0.03,
-        help='the target-kl variable that is referred by --kl')
-    parser.add_argument('--gae', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
-        help='Use GAE for advantage computation')
-    parser.add_argument('--norm-adv', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
-        help="Toggles advantages normalization")
-    parser.add_argument('--anneal-lr', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
-        help="Toggle learning rate annealing for policy and value networks")
-    parser.add_argument('--clip-vloss', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
-        help='Toggles whether or not to use a clipped loss for the value function, as per the paper.')
-    parser.add_argument('--num-models', type=int, default=200,
-        help='the number of models saved')
-    parser.add_argument('--max-eval-workers', type=int, default=2,
-        help='the maximum number of eval workers (skips evaluation when set to 0)')
-
-    args = parser.parse_args()
-    if not args.seed:
-        args.seed = int(time.time())
-    args.num_envs = args.num_selfplay_envs + args.num_bot_envs
-    args.batch_size = int(args.num_envs * args.num_steps)
-    args.minibatch_size = int(args.batch_size // args.n_minibatch)
-    args.num_updates = args.total_timesteps // args.batch_size
-    args.save_frequency = max(1, int(args.num_updates // args.num_models))
-    print(args.save_frequency)
-    # fmt: on
-    return args
-
-
-class MicroRTSStatsRecorder(VecEnvWrapper):
-    def __init__(self, env, gamma=0.99) -> None:
-        super().__init__(env)
-        self.gamma = gamma
-
-    def reset(self):
-        obs = self.venv.reset()
-        self.raw_rewards = [[] for _ in range(self.num_envs)]
-        self.ts = np.zeros(self.num_envs, dtype=np.float32)
-        self.raw_discount_rewards = [[] for _ in range(self.num_envs)]
-        return obs
-
-    def step_wait(self):
-        obs, rews, dones, infos = self.venv.step_wait()
-        newinfos = list(infos[:])
-        for i in range(len(dones)):
-            self.raw_rewards[i] += [infos[i]["raw_rewards"]]
-            self.raw_discount_rewards[i] += [
-                (self.gamma ** self.ts[i])
-                * np.concatenate((infos[i]["raw_rewards"], infos[i]["raw_rewards"].sum()), axis=None)
-            ]
-            self.ts[i] += 1
-            if dones[i]:
-                info = infos[i].copy()
-                raw_returns = np.array(self.raw_rewards[i]).sum(0)
-                raw_names = [str(rf) for rf in self.rfs]
-                raw_discount_returns = np.array(self.raw_discount_rewards[i]).sum(0)
-                raw_discount_names = ["discounted_" + str(rf) for rf in self.rfs] + ["discounted"]
-                info["microrts_stats"] = dict(zip(raw_names, raw_returns))
-                info["microrts_stats"].update(dict(zip(raw_discount_names, raw_discount_returns)))
-                self.raw_rewards[i] = []
-                self.raw_discount_rewards[i] = []
-                self.ts[i] = 0
-                newinfos[i] = info
-        return obs, rews, dones, newinfos
-
-
-# ALGO LOGIC: initialize agent here:
-class CategoricalMasked(Categorical):
-    def __init__(self, probs=None, logits=None, validate_args=None, masks=[], mask_value=None):
-        logits = torch.where(masks.bool(), logits, mask_value)
-        super(CategoricalMasked, self).__init__(probs, logits, validate_args)
-
-
-class Transpose(nn.Module):
-    def __init__(self, permutation):
-        super().__init__()
-        self.permutation = permutation
-
-    def forward(self, x):
-        return x.permute(self.permutation)
-
-
-def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
-    torch.nn.init.orthogonal_(layer.weight, std)
-    torch.nn.init.constant_(layer.bias, bias_const)
-    return layer
-
-
-class Agent(nn.Module):
-    def __init__(self, envs, mapsize=16 * 16):
-        super(Agent, self).__init__()
-        self.mapsize = mapsize
-        h, w, c = envs.observation_space.shape
-        self.encoder = nn.Sequential(
-            Transpose((0, 3, 1, 2)),
-            layer_init(nn.Conv2d(c, 32, kernel_size=3, padding=1)),
-            nn.MaxPool2d(3, stride=2, padding=1),
-            nn.ReLU(),
-            layer_init(nn.Conv2d(32, 64, kernel_size=3, padding=1)),
-            nn.MaxPool2d(3, stride=2, padding=1),
-            nn.ReLU(),
-            layer_init(nn.Conv2d(64, 128, kernel_size=3, padding=1)),
-            nn.MaxPool2d(3, stride=2, padding=1),
-            nn.ReLU(),
-            layer_init(nn.Conv2d(128, 256, kernel_size=3, padding=1)),
-            nn.MaxPool2d(3, stride=2, padding=1),
-        )
-
-        self.actor = nn.Sequential(
-            layer_init(nn.ConvTranspose2d(256, 128, 3, stride=2, padding=1, output_padding=1)),
-            nn.ReLU(),
-            layer_init(nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1)),
-            nn.ReLU(),
-            layer_init(nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1)),
-            nn.ReLU(),
-            layer_init(nn.ConvTranspose2d(32, 78, 3, stride=2, padding=1, output_padding=1)),
-            Transpose((0, 2, 3, 1)),
-        )
-        self.critic = nn.Sequential(
-            nn.Flatten(),
-            layer_init(nn.Linear(256, 128)),
-            nn.ReLU(),
-            layer_init(nn.Linear(128, 1), std=1),
-        )
-        self.register_buffer("mask_value", torch.tensor(-1e8))
-
-    def get_action_and_value(self, x, action=None, invalid_action_masks=None, envs=None, device=None):
-        hidden = self.encoder(x)
-        logits = self.actor(hidden)
-        grid_logits = logits.reshape(-1, envs.action_plane_space.nvec.sum())
-        split_logits = torch.split(grid_logits, envs.action_plane_space.nvec.tolist(), dim=1)
-
-        if action is None:
-            # invalid_action_masks = torch.tensor(np.array(envs.vec_client.getMasks(0))).to(device)
-            invalid_action_masks = invalid_action_masks.view(-1, invalid_action_masks.shape[-1])
-            split_invalid_action_masks = torch.split(invalid_action_masks, envs.action_plane_space.nvec.tolist(), dim=1)
-            multi_categoricals = [
-                CategoricalMasked(logits=logits, masks=iam, mask_value=self.mask_value)
-                for (logits, iam) in zip(split_logits, split_invalid_action_masks)
-            ]
-            action = torch.stack([categorical.sample() for categorical in multi_categoricals])
-        else:
-            invalid_action_masks = invalid_action_masks.view(-1, invalid_action_masks.shape[-1])
-            action = action.view(-1, action.shape[-1]).T
-            split_invalid_action_masks = torch.split(invalid_action_masks, envs.action_plane_space.nvec.tolist(), dim=1)
-            multi_categoricals = [
-                CategoricalMasked(logits=logits, masks=iam, mask_value=self.mask_value)
-                for (logits, iam) in zip(split_logits, split_invalid_action_masks)
-            ]
-        logprob = torch.stack([categorical.log_prob(a) for a, categorical in zip(action, multi_categoricals)])
-        entropy = torch.stack([categorical.entropy() for categorical in multi_categoricals])
-        num_predicted_parameters = len(envs.action_plane_space.nvec)
-        logprob = logprob.T.view(-1, self.mapsize, num_predicted_parameters)
-        entropy = entropy.T.view(-1, self.mapsize, num_predicted_parameters)
-        action = action.T.view(-1, self.mapsize, num_predicted_parameters)
-        return action, logprob.sum(1).sum(1), entropy.sum(1).sum(1), invalid_action_masks, self.critic(hidden)
-
-    def get_value(self, x):
-        return self.critic(self.encoder(x))
-
-
-def run_evaluation(model_path: str, output_path: str):
-    args = [
-        "python",
-        "league.py",
-        "--evals",
-        model_path,
-        "--update-db",
-        "false",
-        "--cuda",
-        "false",
-        "--output-path",
-        output_path,
-    ]
-    fd = subprocess.Popen(args)
-    print(f"Evaluating {model_path}")
-    return_code = fd.wait()
-    assert return_code == 0
-    return (model_path, output_path)
-
-
-class TrueskillWriter:
-    def __init__(self, prod_mode, writer, league_path: str, league_step_path: str):
-        self.prod_mode = prod_mode
-        self.writer = writer
-        self.trueskill_df = pd.read_csv(league_path)
-        self.trueskill_step_df = pd.read_csv(league_step_path)
-        self.trueskill_step_df["type"] = self.trueskill_step_df["name"]
-        self.trueskill_step_df["step"] = 0
-        # xxx(okachaiev): not sure we need this copy
-        self.preset_trueskill_step_df = self.trueskill_step_df.copy()
-
-    def on_evaluation_done(self, future):
-        if future.cancelled():
-            return
-        model_path, output_path = future.result()
-        league = pd.read_csv(output_path, index_col="name")
-        assert model_path in league.index
-        model_global_step = int(model_path.split("/")[-1][:-3])
-        self.writer.add_scalar("charts/trueskill", league.loc[model_path]["trueskill"], model_global_step)
-        print(f"global_step={model_global_step}, trueskill={league.loc[model_path]['trueskill']}")
-
-        # table visualization logic
-        if self.prod_mode:
-            trueskill_data = {
-                "name": league.loc[model_path].name,
-                "mu": league.loc[model_path]["mu"],
-                "sigma": league.loc[model_path]["sigma"],
-                "trueskill": league.loc[model_path]["trueskill"],
-            }
-            self.trueskill_df = self.trueskill_df.append(trueskill_data, ignore_index=True)
-            wandb.log({"trueskill": wandb.Table(dataframe=self.trueskill_df)})
-            trueskill_data["type"] = "training"
-            trueskill_data["step"] = model_global_step
-            self.trueskill_step_df = self.trueskill_step_df.append(trueskill_data, ignore_index=True)
-            preset_trueskill_step_df_clone = self.preset_trueskill_step_df.copy()
-            preset_trueskill_step_df_clone["step"] = model_global_step
-            self.trueskill_step_df = self.trueskill_step_df.append(preset_trueskill_step_df_clone, ignore_index=True)
-            wandb.log({"trueskill_step": wandb.Table(dataframe=self.trueskill_step_df)})
-
-
-if __name__ == "__main__":
-    args = parse_args()
-
-    # TRY NOT TO MODIFY: setup the environment
-    experiment_name = f"{args.gym_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
-    if args.prod_mode:
-        import wandb
-
-        run = wandb.init(
-            project=args.wandb_project_name,
-            entity=args.wandb_entity,
-            # sync_tensorboard=True,
-            config=vars(args),
-            name=experiment_name,
-            monitor_gym=True,
-            save_code=True,
-        )
-        wandb.tensorboard.patch(save=False)
-        CHECKPOINT_FREQUENCY = 50
-    writer = SummaryWriter(f"runs/{experiment_name}")
-    writer.add_text(
-        "hyperparameters", "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()]))
-    )
-
-    # TRY NOT TO MODIFY: seeding
-    device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
-    random.seed(args.seed)
-    np.random.seed(args.seed)
-    torch.manual_seed(args.seed)
-    torch.backends.cudnn.deterministic = args.torch_deterministic
-    envs = MicroRTSGridModeVecEnv(
-        num_selfplay_envs=args.num_selfplay_envs,
-        num_bot_envs=args.num_bot_envs,
-        partial_obs=args.partial_obs,
-        max_steps=2000,
-        render_theme=2,
-        ai2s=[microrts_ai.coacAI for _ in range(args.num_bot_envs - 6)]
-        + [microrts_ai.randomBiasedAI for _ in range(min(args.num_bot_envs, 2))]
-        + [microrts_ai.lightRushAI for _ in range(min(args.num_bot_envs, 2))]
-        + [microrts_ai.workerRushAI for _ in range(min(args.num_bot_envs, 2))],
-        map_paths=["maps/16x16/basesWorkers16x16.xml"],
-        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
-    )
-    envs = MicroRTSStatsRecorder(envs, args.gamma)
-    envs = VecMonitor(envs)
-    if args.capture_video:
-        envs = VecVideoRecorder(
-            envs, f"videos/{experiment_name}", record_video_trigger=lambda x: x % 100000 == 0, video_length=2000
-        )
-    assert isinstance(envs.action_space, MultiDiscrete), "only MultiDiscrete action space is supported"
-
-    eval_executor = None
-    if args.max_eval_workers > 0:
-        eval_executor = ThreadPoolExecutor(max_workers=args.max_eval_workers, thread_name_prefix="league-eval-")
-
-    agent = Agent(envs).to(device)
-    optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5)
-    if args.anneal_lr:
-        # https://github.com/openai/baselines/blob/ea25b9e8b234e6ee1bca43083f8f3cf974143998/baselines/ppo2/defaults.py#L20
-        lr = lambda f: f * args.learning_rate
-
-    # ALGO Logic: Storage for epoch data
-    mapsize = 16 * 16
-    action_space_shape = (mapsize, len(envs.action_plane_space.nvec))
-    invalid_action_shape = (mapsize, envs.action_plane_space.nvec.sum())
-
-    obs = torch.zeros((args.num_steps, args.num_envs) + envs.observation_space.shape).to(device)
-    actions = torch.zeros((args.num_steps, args.num_envs) + action_space_shape).to(device)
-    logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device)
-    rewards = torch.zeros((args.num_steps, args.num_envs)).to(device)
-    dones = torch.zeros((args.num_steps, args.num_envs)).to(device)
-    values = torch.zeros((args.num_steps, args.num_envs)).to(device)
-    invalid_action_masks = torch.zeros((args.num_steps, args.num_envs) + invalid_action_shape).to(device)
-    # TRY NOT TO MODIFY: start the game
-    global_step = 0
-    start_time = time.time()
-    # Note how `next_obs` and `next_done` are used; their usage is equivalent to
-    # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/84a7582477fb0d5c82ad6d850fe476829dddd2e1/a2c_ppo_acktr/storage.py#L60
-    next_obs = torch.Tensor(envs.reset()).to(device)
-    next_done = torch.zeros(args.num_envs).to(device)
-
-    ## CRASH AND RESUME LOGIC:
-    starting_update = 1
-
-    if args.prod_mode and wandb.run.resumed:
-        starting_update = run.summary.get("charts/update") + 1
-        global_step = starting_update * args.batch_size
-        api = wandb.Api()
-        run = api.run(f"{run.entity}/{run.project}/{run.id}")
-        model = run.file("agent.pt")
-        model.download(f"models/{experiment_name}/")
-        agent.load_state_dict(torch.load(f"models/{experiment_name}/agent.pt", map_location=device))
-        agent.eval()
-        print(f"resumed at update {starting_update}")
-
-    print("Model's state_dict:")
-    for param_tensor in agent.state_dict():
-        print(param_tensor, "\t", agent.state_dict()[param_tensor].size())
-    total_params = sum([param.nelement() for param in agent.parameters()])
-    print("Model's total parameters:", total_params)
-
-    ## EVALUATION LOGIC:
-    trueskill_writer = TrueskillWriter(
-        args.prod_mode, writer, "gym-microrts-static-files/league.csv", "gym-microrts-static-files/league.csv"
-    )
-
-    for update in range(starting_update, args.num_updates + 1):
-        # Annealing the rate if instructed to do so.
-        if args.anneal_lr:
-            frac = 1.0 - (update - 1.0) / args.num_updates
-            lrnow = lr(frac)
-            optimizer.param_groups[0]["lr"] = lrnow
-
-        # TRY NOT TO MODIFY: prepare the execution of the game.
-        for step in range(0, args.num_steps):
-            # envs.render()
-            global_step += 1 * args.num_envs
-            obs[step] = next_obs
-            dones[step] = next_done
-            # ALGO LOGIC: put action logic here
-            with torch.no_grad():
-                invalid_action_masks[step] = torch.tensor(np.array(envs.get_action_mask())).to(device)
-                action, logproba, _, _, vs = agent.get_action_and_value(
-                    next_obs, envs=envs, invalid_action_masks=invalid_action_masks[step], device=device
-                )
-                values[step] = vs.flatten()
-
-            actions[step] = action
-            logprobs[step] = logproba
-            try:
-                next_obs, rs, ds, infos = envs.step(action.cpu().numpy().reshape(envs.num_envs, -1))
-                next_obs = torch.Tensor(next_obs).to(device)
-            except Exception as e:
-                e.printStackTrace()
-                raise
-            rewards[step], next_done = torch.Tensor(rs).to(device), torch.Tensor(ds).to(device)
-
-            for info in infos:
-                if "episode" in info.keys():
-                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
-                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
-                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
-                    for key in info["microrts_stats"]:
-                        writer.add_scalar(f"charts/episodic_return/{key}", info["microrts_stats"][key], global_step)
-                    break
-
-        # bootstrap reward if not done. reached the batch limit
-        with torch.no_grad():
-            last_value = agent.get_value(next_obs.to(device)).reshape(1, -1)
-            if args.gae:
-                advantages = torch.zeros_like(rewards).to(device)
-                lastgaelam = 0
-                for t in reversed(range(args.num_steps)):
-                    if t == args.num_steps - 1:
-                        nextnonterminal = 1.0 - next_done
-                        nextvalues = last_value
-                    else:
-                        nextnonterminal = 1.0 - dones[t + 1]
-                        nextvalues = values[t + 1]
-                    delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
-                    advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam
-                returns = advantages + values
-            else:
-                returns = torch.zeros_like(rewards).to(device)
-                for t in reversed(range(args.num_steps)):
-                    if t == args.num_steps - 1:
-                        nextnonterminal = 1.0 - next_done
-                        next_return = last_value
-                    else:
-                        nextnonterminal = 1.0 - dones[t + 1]
-                        next_return = returns[t + 1]
-                    returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
-                advantages = returns - values
-
-        # flatten the batch
-        b_obs = obs.reshape((-1,) + envs.observation_space.shape)
-        b_logprobs = logprobs.reshape(-1)
-        b_actions = actions.reshape((-1,) + action_space_shape)
-        b_advantages = advantages.reshape(-1)
-        b_returns = returns.reshape(-1)
-        b_values = values.reshape(-1)
-        b_invalid_action_masks = invalid_action_masks.reshape((-1,) + invalid_action_shape)
-
-        # Optimizing the policy and value network
-        inds = np.arange(
-            args.batch_size,
-        )
-        for i_epoch_pi in range(args.update_epochs):
-            np.random.shuffle(inds)
-            for start in range(0, args.batch_size, args.minibatch_size):
-                end = start + args.minibatch_size
-                minibatch_ind = inds[start:end]
-                mb_advantages = b_advantages[minibatch_ind]
-                if args.norm_adv:
-                    mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8)
-                _, newlogproba, entropy, _, new_values = agent.get_action_and_value(
-                    b_obs[minibatch_ind], b_actions.long()[minibatch_ind], b_invalid_action_masks[minibatch_ind], envs, device
-                )
-                ratio = (newlogproba - b_logprobs[minibatch_ind]).exp()
-
-                # Stats
-                approx_kl = (b_logprobs[minibatch_ind] - newlogproba).mean()
-
-                # Policy loss
-                pg_loss1 = -mb_advantages * ratio
-                pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef)
-                pg_loss = torch.max(pg_loss1, pg_loss2).mean()
-                entropy_loss = entropy.mean()
-
-                # Value loss
-                new_values = new_values.view(-1)
-                if args.clip_vloss:
-                    v_loss_unclipped = (new_values - b_returns[minibatch_ind]) ** 2
-                    v_clipped = b_values[minibatch_ind] + torch.clamp(
-                        new_values - b_values[minibatch_ind], -args.clip_coef, args.clip_coef
-                    )
-                    v_loss_clipped = (v_clipped - b_returns[minibatch_ind]) ** 2
-                    v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped)
-                    v_loss = 0.5 * v_loss_max.mean()
-                else:
-                    v_loss = 0.5 * ((new_values - b_returns[minibatch_ind]) ** 2)
-
-                loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef
-
-                optimizer.zero_grad()
-                loss.backward()
-                nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
-                optimizer.step()
-
-        if (update - 1) % args.save_frequency == 0:
-            if not os.path.exists(f"models/{experiment_name}"):
-                os.makedirs(f"models/{experiment_name}")
-            torch.save(agent.state_dict(), f"models/{experiment_name}/agent.pt")
-            torch.save(agent.state_dict(), f"models/{experiment_name}/{global_step}.pt")
-            if args.prod_mode:
-                wandb.save(f"models/{experiment_name}/agent.pt", base_path=f"models/{experiment_name}", policy="now")
-            if eval_executor is not None:
-                future = eval_executor.submit(
-                    run_evaluation, f"models/{experiment_name}/{global_step}.pt", f"runs/{experiment_name}/{global_step}.csv"
-                )
-                print(f"Queued models/{experiment_name}/{global_step}.pt")
-                future.add_done_callback(trueskill_writer.on_evaluation_done)
-
-        # TRY NOT TO MODIFY: record rewards for plotting purposes
-        writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step)
-        writer.add_scalar("charts/update", update, global_step)
-        writer.add_scalar("losses/value_loss", v_loss.item(), global_step)
-        writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step)
-        writer.add_scalar("losses/entropy", entropy.mean().item(), global_step)
-        writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step)
-        if args.kle_stop or args.kle_rollback:
-            writer.add_scalar("debug/pg_stop_iter", i_epoch_pi, global_step)
-        writer.add_scalar("charts/sps", int(global_step / (time.time() - start_time)), global_step)
-        print("SPS:", int(global_step / (time.time() - start_time)))
-
-    if eval_executor is not None:
-        # shutdown pool of threads but make sure we finished scheduled evaluations
-        eval_executor.shutdown(wait=True, cancel_futures=False)
-    envs.close()
-    writer.close()
diff --git a/gym_microrts/envs/vec_env.py b/gym_microrts/envs/vec_env.py
index 930f60de..8dffdc07 100644
--- a/gym_microrts/envs/vec_env.py
+++ b/gym_microrts/envs/vec_env.py
@@ -52,6 +52,7 @@ def __init__(
         map_paths=["maps/10x10/basesTwoWorkers10x10.xml"],
         reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0]),
         cycle_maps=[],
+        autobuild_microrts=True,
     ):
 
         self.num_selfplay_envs = num_selfplay_envs
@@ -81,13 +82,15 @@ def __init__(
         if not os.path.exists(f"{self.microrts_path}/README.md"):
             print(MICRORTS_CLONE_MESSAGE)
             os.system(f"git submodule update --init --recursive")
-        print(f"removing {self.microrts_path}/microrts.jar...")
-        if os.path.exists(f"{self.microrts_path}/microrts.jar"):
-            os.remove(f"{self.microrts_path}/microrts.jar")
-        print(f"building {self.microrts_path}/microrts.jar...")
 
-        # call the build script at the microrts folder
-        subprocess.run(["bash", "build.sh", "&>", "build.log"], cwd=f"{self.microrts_path}")
+        if autobuild_microrts:
+            print(f"removing {self.microrts_path}/microrts.jar...")
+            if os.path.exists(f"{self.microrts_path}/microrts.jar"):
+                os.remove(f"{self.microrts_path}/microrts.jar")
+            print(f"building {self.microrts_path}/microrts.jar...")
+
+            # call the build script at the microrts folder
+            subprocess.run(["bash", "build.sh", "&>", "build.log"], cwd=f"{self.microrts_path}")
 
         # read map
         root = ET.parse(os.path.join(self.microrts_path, self.map_paths[0])).getroot()
@@ -141,11 +144,10 @@ def __init__(
 
         # computed properties
         # [num_planes_hp(5), num_planes_resources(5), num_planes_player(5),
-        # num_planes_unit_type(z), num_planes_unit_action(6)]
-
-        self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6]
+        # num_planes_unit_type(z), num_planes_unit_action(6), num_planes_terrain(2)]
+        self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2]
         if partial_obs:
-            self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2]
+            self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2] # two extra planes for visibility # TODO
         self.observation_space = gym.spaces.Box(
             low=0.0, high=1.0, shape=(self.height, self.width, sum(self.num_planes)), dtype=np.int32
         )
diff --git a/gym_microrts/microrts b/gym_microrts/microrts
index 4087c7e7..943912d5 160000
--- a/gym_microrts/microrts
+++ b/gym_microrts/microrts
@@ -1 +1 @@
-Subproject commit 4087c7e7c15532b9cc90238729c5826b08043a65
+Subproject commit 943912d565791fa7b3fc2f58eb612487a8508e6b
diff --git a/hello_world.py b/hello_world.py
index c1770fed..900819d7 100644
--- a/hello_world.py
+++ b/hello_world.py
@@ -13,7 +13,7 @@
     max_steps=2000,
     render_theme=2,
     ai2s=[microrts_ai.coacAI for _ in range(1)],
-    map_paths=["maps/16x16/basesWorkers16x16.xml"],
+    map_paths=["maps/16x16/basesWorkers16x16Bwall.xml"],
     reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
 )
 # envs = VecVideoRecorder(envs, 'videos', record_video_trigger=lambda x: x % 4000 == 0, video_length=2000)
diff --git a/tests/test_observation.py b/tests/test_observation.py
index 29c311ba..c72e8a6a 100644
--- a/tests/test_observation.py
+++ b/tests/test_observation.py
@@ -24,21 +24,24 @@ def test_observation():
         0., 0., 0., 0., 1., # >= 4 resources
         1., 0., 0.,         # no owner
         0., 1., 0., 0., 0., 0., 0., 0.,  # unit type resource
-        1., 0., 0., 0., 0., 0.  # currently not executing actions
+        1., 0., 0., 0., 0., 0.,  # currently not executing actions
+        1., 0.,         # terrain: TERRAIN_NONE
     ]).astype(np.int32)
     p1_worker = np.array([
         0., 1., 0., 0., 0., # 1 hp
         1., 0., 0., 0., 0., # 0 resources
         0., 1., 0.,         # player 1 owns it 
         0., 0., 0., 0., 1., 0., 0., 0., # unit type worker
-        1., 0., 0., 0., 0., 0. # currently not executing actions
+        1., 0., 0., 0., 0., 0., # currently not executing actions
+        1., 0.,         # terrain: TERRAIN_NONE
     ]).astype(np.int32)
     p1_base = np.array([
         0., 0., 0., 0., 1.,  # 1 hp
         1., 0., 0., 0., 0.,  # 0 resources
         0., 1., 0.,          # player 1 owns it
         0., 0., 1., 0., 0., 0., 0., 0., # unit type base
-        1., 0., 0., 0., 0., 0. # currently not executing actions
+        1., 0., 0., 0., 0., 0., # currently not executing actions
+        1., 0.,         # terrain: TERRAIN_NONE
     ]).astype(np.int32)
     p2_worker = p1_worker.copy()
     p2_worker[10:13] = np.array([0., 0., 1.,]) # player 2 owns it
@@ -49,7 +52,8 @@ def test_observation():
         1., 0., 0., 0., 0.,  # 0 resources
         1., 0., 0.,          # no owner
         1., 0., 0., 0., 0., 0., 0., 0., # unit type empty cell
-        1., 0., 0., 0., 0., 0. # currently not executing actions
+        1., 0., 0., 0., 0., 0., # currently not executing actions
+        1., 0.,         # terrain: TERRAIN_NONE
     ]).astype(np.int32)
     # fmt: on
 
@@ -63,14 +67,13 @@ def test_observation():
     np.testing.assert_array_equal(next_obs[0][14][14], p2_worker)
     np.testing.assert_array_equal(next_obs[0][13][13], p2_base)
 
-    # TODO: fix this BUG
     # player 2's perspective (self play)
-    # np.testing.assert_array_equal(next_obs[1][0][0], resource) # BUG: in `MicroRTSGridModeVecEnv` the onwer is correctly set to [0, 1, 0]
-    # np.testing.assert_array_equal(next_obs[1][1][0], resource) # BUG: in `MicroRTSGridModeVecEnv` the onwer is correctly set to [0, 1, 0]
+    np.testing.assert_array_equal(next_obs[1][0][0], resource)
+    np.testing.assert_array_equal(next_obs[1][1][0], resource)
     np.testing.assert_array_equal(next_obs[1][1][1], p2_worker)
     np.testing.assert_array_equal(next_obs[1][2][2], p2_base)
-    # np.testing.assert_array_equal(next_obs[1][15][15], resource) # BUG: in `MicroRTSGridModeVecEnv` the onwer is correctly set to [0, 1, 0]
-    # np.testing.assert_array_equal(next_obs[1][14][15], resource) # BUG: in `MicroRTSGridModeVecEnv` the onwer is correctly set to [0, 1, 0]
+    np.testing.assert_array_equal(next_obs[1][15][15], resource)
+    np.testing.assert_array_equal(next_obs[1][14][15], resource)
     np.testing.assert_array_equal(next_obs[1][14][14], p1_worker)
     np.testing.assert_array_equal(next_obs[1][13][13], p1_base)
 
@@ -78,4 +81,30 @@ def test_observation():
     for item in [resource, resource, p1_worker, p1_base, resource, resource, p2_worker, p2_base]:
         feature_sum += item.sum()
     feature_sum += empty_cell.sum() * (256 - 8)
-    assert next_obs.sum() == feature_sum * 2 == 2560.0
+    assert next_obs.sum() == feature_sum * 2 == 3072.0
+
+    # test observation with walls
+    envs = MicroRTSGridModeVecEnv(
+        num_bot_envs=0,
+        num_selfplay_envs=2,
+        partial_obs=False,
+        max_steps=5000,
+        render_theme=2,
+        ai2s=[],
+        map_paths=["maps/16x16/basesWorkers16x16Bwall.xml"],
+        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
+    )
+    # fmt: off
+    wall = np.array([
+        1., 0., 0., 0., 0., # 0 hp
+        1., 0., 0., 0., 0., # 0 resources
+        1., 0., 0.,         # no owner
+        1., 0., 0., 0., 0., 0., 0., 0.,  # unit type `-`
+        1., 0., 0., 0., 0., 0.,  # currently not executing actions
+        0., 1.,         # terrain: TERRAIN_WALL
+    ]).astype(np.int32)
+    # fmt: on
+    next_obs = envs.reset()
+    print(next_obs[0][0][0], wall)
+    np.testing.assert_array_equal(next_obs[0][0][0], wall)
+

From 5e1be7f033b0448e1bdc0b8ec660642b59f9e795 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 17:40:00 -0500
Subject: [PATCH 02/10] Remove eval test cases until we train a new model

---
 tests/test_e2e.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/tests/test_e2e.py b/tests/test_e2e.py
index 0746b538..d1a07785 100644
--- a/tests/test_e2e.py
+++ b/tests/test_e2e.py
@@ -14,22 +14,22 @@ def test_ppo_gridnet():
         assert grepexc.returncode in [0, 134]
 
 
-def test_ppo_gridnet_eval_selfplay():
-    try:
-        subprocess.run(
-            "cd experiments; python ppo_gridnet_eval.py --num-steps 16 --total-timesteps 32 --cuda False",
-            shell=True,
-            check=True,
-        )
-    except subprocess.CalledProcessError as grepexc:
-        print("error code", grepexc.returncode, grepexc.output)
-        assert grepexc.returncode in [0, 134]
-
-
-def test_ppo_gridnet_eval_bot():
-
-    subprocess.run(
-        "cd experiments; python ppo_gridnet_eval.py --ai coacAI --num-steps 16 --total-timesteps 32 --cuda False",
-        shell=True,
-        check=True,
-    )
+# def test_ppo_gridnet_eval_selfplay():
+#     try:
+#         subprocess.run(
+#             "cd experiments; python ppo_gridnet_eval.py --num-steps 16 --total-timesteps 32 --cuda False",
+#             shell=True,
+#             check=True,
+#         )
+#     except subprocess.CalledProcessError as grepexc:
+#         print("error code", grepexc.returncode, grepexc.output)
+#         assert grepexc.returncode in [0, 134]
+
+
+# def test_ppo_gridnet_eval_bot():
+
+#     subprocess.run(
+#         "cd experiments; python ppo_gridnet_eval.py --ai coacAI --num-steps 16 --total-timesteps 32 --cuda False",
+#         shell=True,
+#         check=True,
+#     )

From 593b19dfc5e48c5cc2b2fc6d78850be30284c8c3 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 17:40:39 -0500
Subject: [PATCH 03/10] pre-commit

---
 experiments/league.py           | 5 +++--
 experiments/ppo_gridnet.py      | 3 +--
 experiments/ppo_gridnet_eval.py | 1 +
 gym_microrts/envs/vec_env.py    | 2 +-
 tests/test_observation.py       | 1 -
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/experiments/league.py b/experiments/league.py
index 4c64ed75..fd89f871 100644
--- a/experiments/league.py
+++ b/experiments/league.py
@@ -85,12 +85,13 @@ def parse_args():
 
 if args.model_type == "ppo_gridnet":
     from ppo_gridnet import Agent, MicroRTSStatsRecorder
-    from gym_microrts.envs.vec_env import MicroRTSBotVecEnv
-    from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
+
+    from gym_microrts.envs.vec_env import MicroRTSBotVecEnv, MicroRTSGridModeVecEnv
 
 else:
     raise ValueError(f"model_type {args.model_type} is not supported")
 
+
 class BaseModel(Model):
     class Meta:
         database = db
diff --git a/experiments/ppo_gridnet.py b/experiments/ppo_gridnet.py
index befb0e7e..9bbb3f79 100644
--- a/experiments/ppo_gridnet.py
+++ b/experiments/ppo_gridnet.py
@@ -457,8 +457,7 @@ def on_evaluation_done(self, future):
                         writer.add_scalar(f"charts/episodic_return/{key}", info["microrts_stats"][key], global_step)
                     break
 
-
-        training_time_start =  time.time()
+        training_time_start = time.time()
         # bootstrap reward if not done. reached the batch limit
         with torch.no_grad():
             last_value = agent.get_value(next_obs).reshape(1, -1)
diff --git a/experiments/ppo_gridnet_eval.py b/experiments/ppo_gridnet_eval.py
index a7bd1913..4b67964a 100644
--- a/experiments/ppo_gridnet_eval.py
+++ b/experiments/ppo_gridnet_eval.py
@@ -74,6 +74,7 @@ def parse_args():
 
     if args.model_type == "ppo_gridnet":
         from ppo_gridnet import Agent, MicroRTSStatsRecorder
+
         from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
     else:
         raise ValueError(f"model_type {args.model_type} is not supported")
diff --git a/gym_microrts/envs/vec_env.py b/gym_microrts/envs/vec_env.py
index 8dffdc07..b90dcde9 100644
--- a/gym_microrts/envs/vec_env.py
+++ b/gym_microrts/envs/vec_env.py
@@ -147,7 +147,7 @@ def __init__(
         # num_planes_unit_type(z), num_planes_unit_action(6), num_planes_terrain(2)]
         self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2]
         if partial_obs:
-            self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2] # two extra planes for visibility # TODO
+            self.num_planes = [5, 5, 3, len(self.utt["unitTypes"]) + 1, 6, 2]  # two extra planes for visibility # TODO
         self.observation_space = gym.spaces.Box(
             low=0.0, high=1.0, shape=(self.height, self.width, sum(self.num_planes)), dtype=np.int32
         )
diff --git a/tests/test_observation.py b/tests/test_observation.py
index c72e8a6a..813f1e86 100644
--- a/tests/test_observation.py
+++ b/tests/test_observation.py
@@ -107,4 +107,3 @@ def test_observation():
     next_obs = envs.reset()
     print(next_obs[0][0][0], wall)
     np.testing.assert_array_equal(next_obs[0][0][0], wall)
-

From 119768bb5755b0d5a19cad4bdcef11055c974fb6 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 17:44:24 -0500
Subject: [PATCH 04/10] Do not rebuild gym-microrts during training

---
 experiments/league.py               |   3 +
 experiments/ppo_gridnet_multigpu.py | 635 ++++++++++++++++++++++++++++
 gym_microrts/envs/vec_env.py        |  16 +-
 3 files changed, 648 insertions(+), 6 deletions(-)
 create mode 100644 experiments/ppo_gridnet_multigpu.py

diff --git a/experiments/league.py b/experiments/league.py
index fd89f871..c38b2546 100644
--- a/experiments/league.py
+++ b/experiments/league.py
@@ -185,6 +185,7 @@ def __init__(self, partial_obs: bool, match_up=None, map_path="maps/16x16/basesW
                 ai2s=built_in_ais,
                 map_paths=[map_path],
                 reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
+                autobuild_microrts=False,
             )
             self.agent = Agent(self.envs).to(self.device)
             self.agent.load_state_dict(torch.load(self.rl_ai, map_location=self.device))
@@ -198,6 +199,7 @@ def __init__(self, partial_obs: bool, match_up=None, map_path="maps/16x16/basesW
                 render_theme=2,
                 map_paths=[map_path],
                 reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
+                autobuild_microrts=False,
             )
             self.agent = Agent(self.envs).to(self.device)
             self.agent.load_state_dict(torch.load(self.rl_ai, map_location=self.device))
@@ -213,6 +215,7 @@ def __init__(self, partial_obs: bool, match_up=None, map_path="maps/16x16/basesW
                 render_theme=2,
                 map_paths=[map_path],
                 reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
+                autobuild_microrts=False,
             )
         self.envs = MicroRTSStatsRecorder(self.envs)
         self.envs = VecMonitor(self.envs)
diff --git a/experiments/ppo_gridnet_multigpu.py b/experiments/ppo_gridnet_multigpu.py
new file mode 100644
index 00000000..3ef66c9d
--- /dev/null
+++ b/experiments/ppo_gridnet_multigpu.py
@@ -0,0 +1,635 @@
+# http://proceedings.mlr.press/v97/han19a/han19a.pdf
+
+import argparse
+import os
+import random
+import subprocess
+import time
+import warnings
+from distutils.util import strtobool
+from typing import List
+
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+import torch.distributed as dist
+import torch.optim as optim
+from gym.spaces import MultiDiscrete
+from stable_baselines3.common.vec_env import VecEnvWrapper, VecMonitor, VecVideoRecorder
+from torch.distributions.categorical import Categorical
+from torch.utils.tensorboard import SummaryWriter
+
+from gym_microrts import microrts_ai
+from gym_microrts.envs.vec_env import MicroRTSGridModeVecEnv
+
+
+def parse_args():
+    # fmt: off
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--exp-name', type=str, default=os.path.basename(__file__).rstrip(".py"),
+        help='the name of this experiment')
+    parser.add_argument('--gym-id', type=str, default="MicroRTSGridModeVecEnv",
+        help='the id of the gym environment')
+    parser.add_argument('--learning-rate', type=float, default=2.5e-4,
+        help='the learning rate of the optimizer')
+    parser.add_argument('--seed', type=int, default=1,
+        help='seed of the experiment')
+    parser.add_argument('--total-timesteps', type=int, default=50000000,
+        help='total timesteps of the experiments')
+    parser.add_argument('--torch-deterministic', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
+        help='if toggled, `torch.backends.cudnn.deterministic=False`')
+    parser.add_argument('--cuda', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
+        help='if toggled, cuda will not be enabled by default')
+    parser.add_argument('--prod-mode', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
+        help='run the script in production mode and use wandb to log outputs')
+    parser.add_argument('--capture-video', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
+        help='weather to capture videos of the agent performances (check out `videos` folder)')
+    parser.add_argument('--wandb-project-name', type=str, default="gym-microrts",
+        help="the wandb's project name")
+    parser.add_argument('--wandb-entity', type=str, default=None,
+        help="the entity (team) of wandb's project")
+
+    # Algorithm specific arguments
+    parser.add_argument('--partial-obs', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
+        help='if toggled, the game will have partial observability')
+    parser.add_argument('--n-minibatch', type=int, default=4,
+        help='the number of mini batch')
+    parser.add_argument('--num-bot-envs', type=int, default=0,
+        help='the number of bot game environment; 16 bot envs measn 16 games')
+    parser.add_argument('--num-selfplay-envs', type=int, default=24,
+        help='the number of self play envs; 16 self play envs means 8 games')
+    parser.add_argument('--num-steps', type=int, default=256,
+        help='the number of steps per game environment')
+    parser.add_argument('--gamma', type=float, default=0.99,
+        help='the discount factor gamma')
+    parser.add_argument('--gae-lambda', type=float, default=0.95,
+        help='the lambda for the general advantage estimation')
+    parser.add_argument('--ent-coef', type=float, default=0.01,
+        help="coefficient of the entropy")
+    parser.add_argument('--vf-coef', type=float, default=0.5,
+        help="coefficient of the value function")
+    parser.add_argument('--max-grad-norm', type=float, default=0.5,
+        help='the maximum norm for the gradient clipping')
+    parser.add_argument('--clip-coef', type=float, default=0.1,
+        help="the surrogate clipping coefficient")
+    parser.add_argument('--update-epochs', type=int, default=4,
+        help="the K epochs to update the policy")
+    parser.add_argument('--kle-stop', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
+        help='If toggled, the policy updates will be early stopped w.r.t target-kl')
+    parser.add_argument('--kle-rollback', type=lambda x: bool(strtobool(x)), default=False, nargs='?', const=True,
+        help='If toggled, the policy updates will roll back to previous policy if KL exceeds target-kl')
+    parser.add_argument('--target-kl', type=float, default=0.03,
+        help='the target-kl variable that is referred by --kl')
+    parser.add_argument('--gae', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
+        help='Use GAE for advantage computation')
+    parser.add_argument('--norm-adv', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
+        help="Toggles advantages normalization")
+    parser.add_argument('--anneal-lr', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
+        help="Toggle learning rate annealing for policy and value networks")
+    parser.add_argument('--clip-vloss', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
+        help='Toggles whether or not to use a clipped loss for the value function, as per the paper.')
+    parser.add_argument('--num-models', type=int, default=100,
+        help='the number of models saved')
+    parser.add_argument('--max-eval-workers', type=int, default=4,
+        help='the maximum number of eval workers (skips evaluation when set to 0)')
+    parser.add_argument('--train-maps', nargs='+', default=["maps/16x16/basesWorkers16x16A.xml"],
+        help='the list of maps used during training')
+    parser.add_argument('--eval-maps', nargs='+', default=["maps/16x16/basesWorkers16x16A.xml"],
+        help='the list of maps used during evaluation')
+
+    # multi-GPU arguments
+    parser.add_argument("--device-ids", nargs="+", default=[],
+        help="the device ids that subprocess workers will use")
+    parser.add_argument("--backend", type=str, default="gloo", choices=["gloo", "nccl", "mpi"],
+        help="the id of the environment")
+    args = parser.parse_args()
+    if not args.seed:
+        args.seed = int(time.time())
+    args.num_envs = args.num_selfplay_envs + args.num_bot_envs
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.n_minibatch)
+    args.num_updates = args.total_timesteps // args.batch_size
+    args.save_frequency = max(1, int(args.num_updates // args.num_models))
+    # fmt: on
+    return args
+
+
+class MicroRTSStatsRecorder(VecEnvWrapper):
+    def __init__(self, env, gamma=0.99) -> None:
+        super().__init__(env)
+        self.gamma = gamma
+
+    def reset(self):
+        obs = self.venv.reset()
+        self.raw_rewards = [[] for _ in range(self.num_envs)]
+        self.ts = np.zeros(self.num_envs, dtype=np.float32)
+        self.raw_discount_rewards = [[] for _ in range(self.num_envs)]
+        return obs
+
+    def step_wait(self):
+        obs, rews, dones, infos = self.venv.step_wait()
+        newinfos = list(infos[:])
+        for i in range(len(dones)):
+            self.raw_rewards[i] += [infos[i]["raw_rewards"]]
+            self.raw_discount_rewards[i] += [
+                (self.gamma ** self.ts[i])
+                * np.concatenate((infos[i]["raw_rewards"], infos[i]["raw_rewards"].sum()), axis=None)
+            ]
+            self.ts[i] += 1
+            if dones[i]:
+                info = infos[i].copy()
+                raw_returns = np.array(self.raw_rewards[i]).sum(0)
+                raw_names = [str(rf) for rf in self.rfs]
+                raw_discount_returns = np.array(self.raw_discount_rewards[i]).sum(0)
+                raw_discount_names = ["discounted_" + str(rf) for rf in self.rfs] + ["discounted"]
+                info["microrts_stats"] = dict(zip(raw_names, raw_returns))
+                info["microrts_stats"].update(dict(zip(raw_discount_names, raw_discount_returns)))
+                self.raw_rewards[i] = []
+                self.raw_discount_rewards[i] = []
+                self.ts[i] = 0
+                newinfos[i] = info
+        return obs, rews, dones, newinfos
+
+
+# ALGO LOGIC: initialize agent here:
+class CategoricalMasked(Categorical):
+    def __init__(self, probs=None, logits=None, validate_args=None, masks=[], mask_value=None):
+        logits = torch.where(masks.bool(), logits, mask_value)
+        super(CategoricalMasked, self).__init__(probs, logits, validate_args)
+
+
+class Transpose(nn.Module):
+    def __init__(self, permutation):
+        super().__init__()
+        self.permutation = permutation
+
+    def forward(self, x):
+        return x.permute(self.permutation)
+
+
+def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
+    torch.nn.init.orthogonal_(layer.weight, std)
+    torch.nn.init.constant_(layer.bias, bias_const)
+    return layer
+
+
+class Agent(nn.Module):
+    def __init__(self, envs, mapsize=16 * 16):
+        super(Agent, self).__init__()
+        self.mapsize = mapsize
+        h, w, c = envs.observation_space.shape
+        self.encoder = nn.Sequential(
+            Transpose((0, 3, 1, 2)),
+            layer_init(nn.Conv2d(c, 32, kernel_size=3, padding=1)),
+            nn.MaxPool2d(3, stride=2, padding=1),
+            nn.ReLU(),
+            layer_init(nn.Conv2d(32, 64, kernel_size=3, padding=1)),
+            nn.MaxPool2d(3, stride=2, padding=1),
+            nn.ReLU(),
+        )
+
+        self.actor = nn.Sequential(
+            layer_init(nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1)),
+            nn.ReLU(),
+            layer_init(nn.ConvTranspose2d(32, 78, 3, stride=2, padding=1, output_padding=1)),
+            Transpose((0, 2, 3, 1)),
+        )
+        self.critic = nn.Sequential(
+            nn.Flatten(),
+            layer_init(nn.Linear(64 * 4 * 4, 128)),
+            nn.ReLU(),
+            layer_init(nn.Linear(128, 1), std=1),
+        )
+        self.register_buffer("mask_value", torch.tensor(-1e8))
+
+    def get_action_and_value(self, x, action=None, invalid_action_masks=None, envs=None, device=None):
+        hidden = self.encoder(x)
+        logits = self.actor(hidden)
+        grid_logits = logits.reshape(-1, envs.action_plane_space.nvec.sum())
+        split_logits = torch.split(grid_logits, envs.action_plane_space.nvec.tolist(), dim=1)
+
+        if action is None:
+            invalid_action_masks = invalid_action_masks.view(-1, invalid_action_masks.shape[-1])
+            split_invalid_action_masks = torch.split(invalid_action_masks, envs.action_plane_space.nvec.tolist(), dim=1)
+            multi_categoricals = [
+                CategoricalMasked(logits=logits, masks=iam, mask_value=self.mask_value)
+                for (logits, iam) in zip(split_logits, split_invalid_action_masks)
+            ]
+            action = torch.stack([categorical.sample() for categorical in multi_categoricals])
+        else:
+            invalid_action_masks = invalid_action_masks.view(-1, invalid_action_masks.shape[-1])
+            action = action.view(-1, action.shape[-1]).T
+            split_invalid_action_masks = torch.split(invalid_action_masks, envs.action_plane_space.nvec.tolist(), dim=1)
+            multi_categoricals = [
+                CategoricalMasked(logits=logits, masks=iam, mask_value=self.mask_value)
+                for (logits, iam) in zip(split_logits, split_invalid_action_masks)
+            ]
+        logprob = torch.stack([categorical.log_prob(a) for a, categorical in zip(action, multi_categoricals)])
+        entropy = torch.stack([categorical.entropy() for categorical in multi_categoricals])
+        num_predicted_parameters = len(envs.action_plane_space.nvec)
+        logprob = logprob.T.view(-1, self.mapsize, num_predicted_parameters)
+        entropy = entropy.T.view(-1, self.mapsize, num_predicted_parameters)
+        action = action.T.view(-1, self.mapsize, num_predicted_parameters)
+        return action, logprob.sum(1).sum(1), entropy.sum(1).sum(1), invalid_action_masks, self.critic(hidden)
+
+    def get_value(self, x):
+        return self.critic(self.encoder(x))
+
+
+def run_evaluation(model_path: str, output_path: str, eval_maps: List[str]):
+    args = [
+        "python",
+        "league.py",
+        "--evals",
+        model_path,
+        "--update-db",
+        "false",
+        "--cuda",
+        "false",
+        "--output-path",
+        output_path,
+        "--model-type",
+        "ppo_gridnet",
+        "--maps",
+        *eval_maps,
+    ]
+    fd = subprocess.Popen(args)
+    print(f"Evaluating {model_path}")
+    return_code = fd.wait()
+    assert return_code == 0
+    return (model_path, output_path)
+
+
+class TrueskillWriter:
+    def __init__(self, prod_mode, writer, league_path: str, league_step_path: str):
+        self.prod_mode = prod_mode
+        self.writer = writer
+        self.trueskill_df = pd.read_csv(league_path)
+        self.trueskill_step_df = pd.read_csv(league_step_path)
+        self.trueskill_step_df["type"] = self.trueskill_step_df["name"]
+        self.trueskill_step_df["step"] = 0
+        # xxx(okachaiev): not sure we need this copy
+        self.preset_trueskill_step_df = self.trueskill_step_df.copy()
+
+    def on_evaluation_done(self, future):
+        if future.cancelled():
+            return
+        model_path, output_path = future.result()
+        league = pd.read_csv(output_path, index_col="name")
+        assert model_path in league.index
+        model_global_step = int(model_path.split("/")[-1][:-3])
+        self.writer.add_scalar("charts/trueskill", league.loc[model_path]["trueskill"], model_global_step)
+        print(f"global_step={model_global_step}, trueskill={league.loc[model_path]['trueskill']}")
+
+        # table visualization logic
+        if self.prod_mode:
+            trueskill_data = {
+                "name": league.loc[model_path].name,
+                "mu": league.loc[model_path]["mu"],
+                "sigma": league.loc[model_path]["sigma"],
+                "trueskill": league.loc[model_path]["trueskill"],
+            }
+            self.trueskill_df = self.trueskill_df.append(trueskill_data, ignore_index=True)
+            wandb.log({"trueskill": wandb.Table(dataframe=self.trueskill_df)})
+            trueskill_data["type"] = "training"
+            trueskill_data["step"] = model_global_step
+            self.trueskill_step_df = self.trueskill_step_df.append(trueskill_data, ignore_index=True)
+            preset_trueskill_step_df_clone = self.preset_trueskill_step_df.copy()
+            preset_trueskill_step_df_clone["step"] = model_global_step
+            self.trueskill_step_df = self.trueskill_step_df.append(preset_trueskill_step_df_clone, ignore_index=True)
+            wandb.log({"trueskill_step": wandb.Table(dataframe=self.trueskill_step_df)})
+
+
+if __name__ == "__main__":
+    # torchrun --standalone --nnodes=1 --nproc_per_node=2 ppo_gridnet_multigpu.py
+    # taken from https://pytorch.org/docs/stable/elastic/run.html
+    local_rank = int(os.getenv("LOCAL_RANK", "0"))
+    world_size = int(os.getenv("WORLD_SIZE", "1"))
+    args = parse_args()
+
+    args.num_selfplay_envs = args.num_selfplay_envs // world_size
+    args.num_bot_envs = args.num_bot_envs // world_size
+
+    args.num_envs = args.num_selfplay_envs + args.num_bot_envs
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.n_minibatch)
+    args.num_updates = args.total_timesteps // (args.batch_size * world_size)
+    if world_size > 1:
+        dist.init_process_group(args.backend, rank=local_rank, world_size=world_size)
+    else:
+        warnings.warn(
+            """
+Not using distributed mode!
+If you want to use distributed mode, please execute this script with 'torchrun'.
+E.g., ` torchrun --standalone --nnodes=1 --nproc_per_node=2 ppo_gridnet_multigpu.py`
+        """
+        )
+    print(f"================================")
+    print(f"args.num_envs: {args.num_envs}, args.batch_size: {args.batch_size}, args.minibatch_size: {args.minibatch_size}")
+    run_name = f"{args.gym_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
+    writer = None
+    if local_rank == 0:
+        if args.prod_mode:
+            import wandb
+
+            run = wandb.init(
+                project=args.wandb_project_name,
+                entity=args.wandb_entity,
+                sync_tensorboard=True,
+                config=vars(args),
+                name=run_name,
+                monitor_gym=True,
+                save_code=True,
+            )
+        writer = SummaryWriter(f"runs/{run_name}")
+        writer.add_text(
+            "hyperparameters",
+            "|param|value|\n|-|-|\n%s" % ("\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])),
+        )
+
+    print(f"Save frequency: {args.save_frequency}")
+    # TRY NOT TO MODIFY: seeding
+    device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
+    print(f"Device: {device}")
+    # CRUCIAL: note that we needed to pass a different seed for each data parallelism worker
+    args.seed += local_rank
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed - local_rank)
+    torch.backends.cudnn.deterministic = args.torch_deterministic
+
+    if len(args.device_ids) > 0:
+        assert len(args.device_ids) == world_size, "you must specify the same number of device ids as `--nproc_per_node`"
+        device = torch.device(f"cuda:{args.device_ids[local_rank]}" if torch.cuda.is_available() and args.cuda else "cpu")
+    else:
+        device_count = torch.cuda.device_count()
+        if device_count < world_size:
+            device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
+        else:
+            device = torch.device(f"cuda:{local_rank}" if torch.cuda.is_available() and args.cuda else "cpu")
+
+    envs = MicroRTSGridModeVecEnv(
+        num_selfplay_envs=args.num_selfplay_envs,
+        num_bot_envs=args.num_bot_envs,
+        partial_obs=args.partial_obs,
+        max_steps=2000,
+        render_theme=2,
+        ai2s=[microrts_ai.coacAI for _ in range(args.num_bot_envs - 6)]
+        + [microrts_ai.randomBiasedAI for _ in range(min(args.num_bot_envs, 2))]
+        + [microrts_ai.lightRushAI for _ in range(min(args.num_bot_envs, 2))]
+        + [microrts_ai.workerRushAI for _ in range(min(args.num_bot_envs, 2))],
+        map_paths=[args.train_maps[0]],
+        reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
+        cycle_maps=args.train_maps,
+    )
+    envs = MicroRTSStatsRecorder(envs, args.gamma)
+    envs = VecMonitor(envs)
+    if args.capture_video:
+        envs = VecVideoRecorder(
+            envs, f"videos/{run_name}", record_video_trigger=lambda x: x % 100000 == 0, video_length=2000
+        )
+    assert isinstance(envs.action_space, MultiDiscrete), "only MultiDiscrete action space is supported"
+
+    eval_executor = None
+    if args.max_eval_workers > 0 and local_rank == 0:
+        from concurrent.futures import ThreadPoolExecutor
+
+        eval_executor = ThreadPoolExecutor(max_workers=args.max_eval_workers, thread_name_prefix="league-eval-")
+
+    agent = Agent(envs).to(device)
+    torch.manual_seed(args.seed)
+    optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5)
+    if args.anneal_lr:
+        # https://github.com/openai/baselines/blob/ea25b9e8b234e6ee1bca43083f8f3cf974143998/baselines/ppo2/defaults.py#L20
+        lr = lambda f: f * args.learning_rate
+
+    # ALGO Logic: Storage for epoch data
+    mapsize = 16 * 16
+    action_space_shape = (mapsize, len(envs.action_plane_space.nvec))
+    invalid_action_shape = (mapsize, envs.action_plane_space.nvec.sum())
+
+    obs = torch.zeros((args.num_steps, args.num_envs) + envs.observation_space.shape).to(device)
+    actions = torch.zeros((args.num_steps, args.num_envs) + action_space_shape).to(device)
+    logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    rewards = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    dones = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    values = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    invalid_action_masks = torch.zeros((args.num_steps, args.num_envs) + invalid_action_shape).to(device)
+    # TRY NOT TO MODIFY: start the game
+    global_step = 0
+    start_time = time.time()
+    # Note how `next_obs` and `next_done` are used; their usage is equivalent to
+    # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/84a7582477fb0d5c82ad6d850fe476829dddd2e1/a2c_ppo_acktr/storage.py#L60
+    next_obs = torch.Tensor(envs.reset()).to(device)
+    next_done = torch.zeros(args.num_envs).to(device)
+
+    print("Model's state_dict:")
+    for param_tensor in agent.state_dict():
+        print(param_tensor, "\t", agent.state_dict()[param_tensor].size())
+    total_params = sum([param.nelement() for param in agent.parameters()])
+    print("Model's total parameters:", total_params)
+
+    # EVALUATION LOGIC:
+    trueskill_writer = TrueskillWriter(
+        args.prod_mode, writer, "gym-microrts-static-files/league.csv", "gym-microrts-static-files/league.csv"
+    )
+
+    for update in range(1, args.num_updates + 1):
+        step_time = 0
+        inference_time = 0
+        get_mask_time = 0
+        # Annealing the rate if instructed to do so.
+        if args.anneal_lr:
+            frac = 1.0 - (update - 1.0) / args.num_updates
+            lrnow = lr(frac)
+            optimizer.param_groups[0]["lr"] = lrnow
+
+        # TRY NOT TO MODIFY: prepare the execution of the game.
+        rollout_time_start = time.time()
+        for step in range(0, args.num_steps):
+            # envs.render()
+            global_step += 1 * args.num_envs * world_size
+            obs[step] = next_obs
+            dones[step] = next_done
+
+            get_mask_time_start = time.time()
+            invalid_action_masks[step] = torch.tensor(envs.get_action_mask()).to(device)
+            get_mask_time += time.time() - get_mask_time_start
+
+            # ALGO LOGIC: put action logic here
+            inference_time_start = time.time()
+            with torch.no_grad():
+                action, logproba, _, _, vs = agent.get_action_and_value(
+                    next_obs, envs=envs, invalid_action_masks=invalid_action_masks[step], device=device
+                )
+                values[step] = vs.flatten()
+            actions[step] = action
+            logprobs[step] = logproba
+            cpu_action = action.cpu().numpy().reshape(envs.num_envs, -1)
+            inference_time += time.time() - inference_time_start
+
+            step_time_start = time.time()
+            next_obs, rs, ds, infos = envs.step(cpu_action)
+            step_time += time.time() - step_time_start
+
+            next_obs = torch.Tensor(next_obs).to(device)
+            rewards[step], next_done = torch.Tensor(rs).to(device), torch.Tensor(ds).to(device)
+
+            for info in infos:
+                if "episode" in info.keys() and local_rank == 0:
+                    print(f"global_step={global_step}, episodic_return={info['episode']['r']}")
+                    writer.add_scalar("charts/episodic_return", info["episode"]["r"], global_step)
+                    writer.add_scalar("charts/episodic_length", info["episode"]["l"], global_step)
+                    for key in info["microrts_stats"]:
+                        writer.add_scalar(f"charts/episodic_return/{key}", info["microrts_stats"][key], global_step)
+                    break
+        print(
+            f"local_rank: {local_rank}, action.sum(): {action.sum()}, update: {update}, agent.actor.weight.sum(): {list(agent.actor)[0].weight.sum()}"
+        )
+        training_time_start =  time.time()
+        # bootstrap reward if not done. reached the batch limit
+        with torch.no_grad():
+            last_value = agent.get_value(next_obs).reshape(1, -1)
+            if args.gae:
+                advantages = torch.zeros_like(rewards).to(device)
+                lastgaelam = 0
+                for t in reversed(range(args.num_steps)):
+                    if t == args.num_steps - 1:
+                        nextnonterminal = 1.0 - next_done
+                        nextvalues = last_value
+                    else:
+                        nextnonterminal = 1.0 - dones[t + 1]
+                        nextvalues = values[t + 1]
+                    delta = rewards[t] + args.gamma * nextvalues * nextnonterminal - values[t]
+                    advantages[t] = lastgaelam = delta + args.gamma * args.gae_lambda * nextnonterminal * lastgaelam
+                returns = advantages + values
+            else:
+                returns = torch.zeros_like(rewards).to(device)
+                for t in reversed(range(args.num_steps)):
+                    if t == args.num_steps - 1:
+                        nextnonterminal = 1.0 - next_done
+                        next_return = last_value
+                    else:
+                        nextnonterminal = 1.0 - dones[t + 1]
+                        next_return = returns[t + 1]
+                    returns[t] = rewards[t] + args.gamma * nextnonterminal * next_return
+                advantages = returns - values
+
+        # flatten the batch
+        b_obs = obs.reshape((-1,) + envs.observation_space.shape)
+        b_logprobs = logprobs.reshape(-1)
+        b_actions = actions.reshape((-1,) + action_space_shape)
+        b_advantages = advantages.reshape(-1)
+        b_returns = returns.reshape(-1)
+        b_values = values.reshape(-1)
+        b_invalid_action_masks = invalid_action_masks.reshape((-1,) + invalid_action_shape)
+
+        # Optimizing the policy and value network
+        inds = np.arange(
+            args.batch_size,
+        )
+        for i_epoch_pi in range(args.update_epochs):
+            np.random.shuffle(inds)
+            for start in range(0, args.batch_size, args.minibatch_size):
+                end = start + args.minibatch_size
+                minibatch_ind = inds[start:end]
+                mb_advantages = b_advantages[minibatch_ind]
+                if args.norm_adv:
+                    mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8)
+                _, newlogproba, entropy, _, new_values = agent.get_action_and_value(
+                    b_obs[minibatch_ind], b_actions.long()[minibatch_ind], b_invalid_action_masks[minibatch_ind], envs, device
+                )
+                ratio = (newlogproba - b_logprobs[minibatch_ind]).exp()
+
+                # Stats
+                approx_kl = (b_logprobs[minibatch_ind] - newlogproba).mean()
+
+                # Policy loss
+                pg_loss1 = -mb_advantages * ratio
+                pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - args.clip_coef, 1 + args.clip_coef)
+                pg_loss = torch.max(pg_loss1, pg_loss2).mean()
+                entropy_loss = entropy.mean()
+
+                # Value loss
+                new_values = new_values.view(-1)
+                if args.clip_vloss:
+                    v_loss_unclipped = (new_values - b_returns[minibatch_ind]) ** 2
+                    v_clipped = b_values[minibatch_ind] + torch.clamp(
+                        new_values - b_values[minibatch_ind], -args.clip_coef, args.clip_coef
+                    )
+                    v_loss_clipped = (v_clipped - b_returns[minibatch_ind]) ** 2
+                    v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped)
+                    v_loss = 0.5 * v_loss_max.mean()
+                else:
+                    v_loss = 0.5 * ((new_values - b_returns[minibatch_ind]) ** 2)
+
+                loss = pg_loss - args.ent_coef * entropy_loss + v_loss * args.vf_coef
+
+                optimizer.zero_grad()
+                loss.backward()
+
+                if world_size > 1:
+                    # batch allreduce ops: see https://github.com/entity-neural-network/incubator/pull/220
+                    all_grads_list = []
+                    for param in agent.parameters():
+                        if param.grad is not None:
+                            all_grads_list.append(param.grad.view(-1))
+                    all_grads = torch.cat(all_grads_list)
+                    dist.all_reduce(all_grads, op=dist.ReduceOp.SUM)
+                    offset = 0
+                    for param in agent.parameters():
+                        if param.grad is not None:
+                            param.grad.data.copy_(
+                                all_grads[offset : offset + param.numel()].view_as(param.grad.data) / world_size
+                            )
+                            offset += param.numel()
+
+                nn.utils.clip_grad_norm_(agent.parameters(), args.max_grad_norm)
+                optimizer.step()
+
+        if (update - 1) % args.save_frequency == 0 and local_rank == 0:
+            if not os.path.exists(f"models/{run_name}"):
+                os.makedirs(f"models/{run_name}")
+            torch.save(agent.state_dict(), f"models/{run_name}/agent.pt")
+            torch.save(agent.state_dict(), f"models/{run_name}/{global_step}.pt")
+            if args.prod_mode:
+                wandb.save(f"models/{run_name}/agent.pt", base_path=f"models/{run_name}", policy="now")
+            if eval_executor is not None:
+                future = eval_executor.submit(
+                    run_evaluation,
+                    f"models/{run_name}/{global_step}.pt",
+                    f"runs/{run_name}/{global_step}.csv",
+                    args.eval_maps,
+                )
+                print(f"Queued models/{run_name}/{global_step}.pt")
+                future.add_done_callback(trueskill_writer.on_evaluation_done)
+
+        # TRY NOT TO MODIFY: record rewards for plotting purposes
+        if local_rank == 0:
+            writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]["lr"], global_step)
+            writer.add_scalar("charts/update", update, global_step)
+            writer.add_scalar("losses/value_loss", v_loss.detach().item(), global_step)
+            writer.add_scalar("losses/policy_loss", pg_loss.detach().item(), global_step)
+            writer.add_scalar("losses/entropy", entropy.detach().mean().item(), global_step)
+            writer.add_scalar("losses/approx_kl", approx_kl.detach().item(), global_step)
+            if args.kle_stop or args.kle_rollback:
+                writer.add_scalar("debug/pg_stop_iter", i_epoch_pi, global_step)
+            writer.add_scalar("charts/sps", int(global_step / (time.time() - start_time)), global_step)
+            writer.add_scalar("charts/sps_step", int(args.num_envs * args.num_steps / step_time), global_step)
+            writer.add_scalar("charts/sps_inference", int(args.num_envs * args.num_steps / inference_time), global_step)
+            writer.add_scalar("charts/step_time", step_time, global_step)
+            writer.add_scalar("charts/inference_time", inference_time, global_step)
+            writer.add_scalar("charts/get_mask_time", get_mask_time, global_step)
+            writer.add_scalar("charts/rollout_time", time.time() - rollout_time_start, global_step)
+            writer.add_scalar("charts/training_time", time.time() - training_time_start, global_step)
+            print("SPS:", int(global_step / (time.time() - start_time)))
+
+    if eval_executor is not None:
+        # shutdown pool of threads but make sure we finished scheduled evaluations
+        eval_executor.shutdown(wait=True, cancel_futures=False)
+    envs.close()
+    if local_rank == 0:
+        writer.close()
+        if args.prod_mode:
+            wandb.finish()
diff --git a/gym_microrts/envs/vec_env.py b/gym_microrts/envs/vec_env.py
index b90dcde9..075b8e4c 100644
--- a/gym_microrts/envs/vec_env.py
+++ b/gym_microrts/envs/vec_env.py
@@ -295,6 +295,7 @@ def __init__(
         render_theme=2,
         map_paths="maps/10x10/basesTwoWorkers10x10.xml",
         reward_weight=np.array([0.0, 1.0, 0.0, 0.0, 0.0, 5.0]),
+        autobuild_microrts=True,
     ):
 
         self.ai1s = ai1s
@@ -312,13 +313,16 @@ def __init__(
         if not os.path.exists(f"{self.microrts_path}/README.md"):
             print(MICRORTS_CLONE_MESSAGE)
             os.system(f"git submodule update --init --recursive")
-        print(f"removing {self.microrts_path}/microrts.jar...")
-        if os.path.exists(f"{self.microrts_path}/microrts.jar"):
-            os.remove(f"{self.microrts_path}/microrts.jar")
-        print(f"building {self.microrts_path}/microrts.jar...")
 
-        # call the build script at the microrts folder
-        subprocess.run(["bash", "build.sh", "&>", "build.log"], cwd=f"{self.microrts_path}")
+        if autobuild_microrts:
+            print(f"removing {self.microrts_path}/microrts.jar...")
+            if os.path.exists(f"{self.microrts_path}/microrts.jar"):
+                os.remove(f"{self.microrts_path}/microrts.jar")
+            print(f"building {self.microrts_path}/microrts.jar...")
+
+            # call the build script at the microrts folder
+            subprocess.run(["bash", "build.sh", "&>", "build.log"], cwd=f"{self.microrts_path}")
+
         root = ET.parse(os.path.join(self.microrts_path, self.map_paths[0])).getroot()
         self.height, self.width = int(root.get("height")), int(root.get("width"))
 

From 12690465bf1ae732c884b494e9022aff424d2dc1 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 17:45:44 -0500
Subject: [PATCH 05/10] update poetry file

---
 poetry.lock    | 427 +++++++++++++++++++++++++++++++++++--------------
 pyproject.toml |   4 +-
 2 files changed, 313 insertions(+), 118 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index e915ef97..be2b1c58 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -72,6 +72,7 @@ python-versions = "~=3.6"
 
 [package.dependencies]
 lazy-object-proxy = ">=1.4.0"
+setuptools = ">=20.0"
 typed-ast = {version = ">=1.4.0,<1.5", markers = "implementation_name == \"cpython\" and python_version < \"3.8\""}
 typing-extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""}
 wrapt = ">=1.11,<1.13"
@@ -93,10 +94,10 @@ optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 
 [package.extras]
-dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"]
-docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
-tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"]
-tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"]
+dev = ["coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "sphinx", "sphinx-notfound-page", "zope.interface"]
+docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"]
+tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "zope.interface"]
+tests-no-zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six"]
 
 [[package]]
 name = "autopep8"
@@ -294,7 +295,7 @@ optional = false
 python-versions = ">=3.5.0"
 
 [package.extras]
-unicode_backport = ["unicodedata2"]
+unicode-backport = ["unicodedata2"]
 
 [[package]]
 name = "cleanrl"
@@ -315,16 +316,16 @@ torch = ">=1.7.1,<2.0.0"
 wandb = ">=0.12.1,<0.13.0"
 
 [package.extras]
-atari = ["stable-baselines3 (>=1.1.0,<2.0.0)", "ale-py (>=0.7,<0.8)", "AutoROM[accept-rom-license] (>=0.4.2,<0.5.0)"]
-procgen = ["stable-baselines3 (>=1.1.0,<2.0.0)", "procgen (>=0.10.4,<0.11.0)"]
-pettingzoo = ["stable-baselines3 (>=1.1.0,<2.0.0)", "pettingzoo (>=1.11.2,<2.0.0)", "pygame (>=2.0.1,<3.0.0)", "pymunk (>=6.2.0,<7.0.0)"]
-pybullet = ["pybullet (>=3.1.8,<4.0.0)"]
+atari = ["AutoROM[accept-rom-license] (>=0.4.2,<0.5.0)", "ale-py (>=0.7,<0.8)", "stable-baselines3 (>=1.1.0,<2.0.0)"]
+cloud = ["awscli (>=1.20.57,<2.0.0)", "boto3 (>=1.18.57,<2.0.0)"]
+docs = ["mkdocs-material (>=7.3.4,<8.0.0)"]
+mujoco = ["free-mujoco-py (>=2.1.6,<3.0.0)"]
+pettingzoo = ["pettingzoo (>=1.11.2,<2.0.0)", "pygame (>=2.0.1,<3.0.0)", "pymunk (>=6.2.0,<7.0.0)", "stable-baselines3 (>=1.1.0,<2.0.0)"]
 plot = ["pandas (>=1.3.3,<2.0.0)", "seaborn (>=0.11.2,<0.12.0)"]
-cloud = ["boto3 (>=1.18.57,<2.0.0)", "awscli (>=1.20.57,<2.0.0)"]
-spyder = ["spyder (>=5.1.5,<6.0.0)"]
+procgen = ["procgen (>=0.10.4,<0.11.0)", "stable-baselines3 (>=1.1.0,<2.0.0)"]
+pybullet = ["pybullet (>=3.1.8,<4.0.0)"]
 pytest = ["pytest (>=6.2.5,<7.0.0)"]
-mujoco = ["free-mujoco-py (>=2.1.6,<3.0.0)"]
-docs = ["mkdocs-material (>=7.3.4,<8.0.0)"]
+spyder = ["spyder (>=5.1.5,<6.0.0)"]
 
 [[package]]
 name = "click"
@@ -363,8 +364,8 @@ optional = false
 python-versions = ">=3.6"
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"]
-testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-black (>=0.3.7)", "pytest-mypy"]
+docs = ["jaraco.packaging (>=8.2)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx"]
+testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"]
 
 [[package]]
 name = "cookiecutter"
@@ -396,12 +397,12 @@ python-versions = ">=3.6"
 cffi = ">=1.12"
 
 [package.extras]
-docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"]
-docstest = ["doc8", "pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"]
+docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx_rtd_theme"]
+docstest = ["doc8", "pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"]
 pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"]
 sdist = ["setuptools_rust (>=0.11.4)"]
 ssh = ["bcrypt (>=3.1.5)"]
-test = ["pytest (>=6.2.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"]
+test = ["hypothesis (>=1.11.4,!=3.79.2)", "iso8601", "pretend", "pytest (>=6.2.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pytz"]
 
 [[package]]
 name = "cycler"
@@ -551,10 +552,11 @@ python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*"
 cachetools = ">=2.0.0,<5.0"
 pyasn1-modules = ">=0.2.1"
 rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""}
+setuptools = ">=40.3.0"
 six = ">=1.9.0"
 
 [package.extras]
-aiohttp = ["requests (>=2.20.0,<3.0.0dev)", "aiohttp (>=3.6.2,<4.0.0dev)"]
+aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "requests (>=2.20.0,<3.0.0dev)"]
 pyopenssl = ["pyopenssl (>=20.0.0)"]
 reauth = ["pyu2f (>=0.1.5)"]
 
@@ -602,15 +604,15 @@ numpy = ">=1.18.0"
 
 [package.extras]
 accept-rom-license = ["autorom[accept-rom-license] (>=0.4.2,<0.5.0)"]
-all = ["mujoco_py (>=1.50,<2.0)", "lz4 (>=3.1.0)", "opencv-python (>=3)", "ale-py (>=0.7.1,<0.8.0)", "pyglet (>=1.4.0)", "scipy (>=1.4.1)", "box2d-py (==2.3.5)", "pyglet (>=1.4.0)", "ale-py (>=0.7.1,<0.8.0)", "lz4 (>=3.1.0)", "opencv-python (>=3)", "pyglet (>=1.4.0)", "box2d-py (==2.3.5)", "pyglet (>=1.4.0)", "scipy (>=1.4.1)", "mujoco_py (>=1.50,<2.0)"]
+all = ["ale-py (>=0.7.1,<0.8.0)", "ale-py (>=0.7.1,<0.8.0)", "box2d-py (==2.3.5)", "box2d-py (==2.3.5)", "lz4 (>=3.1.0)", "lz4 (>=3.1.0)", "mujoco_py (>=1.50,<2.0)", "mujoco_py (>=1.50,<2.0)", "pyglet (>=1.4.0)", "pyglet (>=1.4.0)", "pyglet (>=1.4.0)", "pyglet (>=1.4.0)", "scipy (>=1.4.1)", "scipy (>=1.4.1)"]
 atari = ["ale-py (>=0.7.1,<0.8.0)"]
 box2d = ["box2d-py (==2.3.5)", "pyglet (>=1.4.0)"]
-classic_control = ["pyglet (>=1.4.0)"]
+classic-control = ["pyglet (>=1.4.0)"]
 mujoco = ["mujoco_py (>=1.50,<2.0)"]
-nomujoco = ["lz4 (>=3.1.0)", "opencv-python (>=3)", "ale-py (>=0.7.1,<0.8.0)", "pyglet (>=1.4.0)", "scipy (>=1.4.1)", "box2d-py (==2.3.5)", "pyglet (>=1.4.0)"]
-other = ["lz4 (>=3.1.0)", "opencv-python (>=3)"]
+nomujoco = ["ale-py (>=0.7.1,<0.8.0)", "box2d-py (==2.3.5)", "lz4 (>=3.1.0)", "pyglet (>=1.4.0)", "pyglet (>=1.4.0)", "scipy (>=1.4.1)"]
+other = ["lz4 (>=3.1.0)"]
 robotics = ["mujoco_py (>=1.50,<2.0)"]
-toy_text = ["scipy (>=1.4.1)"]
+toy-text = ["scipy (>=1.4.1)"]
 
 [[package]]
 name = "identify"
@@ -652,9 +654,9 @@ typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
 zipp = ">=0.5"
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
+docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
 perf = ["ipython"]
-testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"]
+testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pep517", "pyfakefs", "pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy", "pytest-perf (>=0.9.2)"]
 
 [[package]]
 name = "importlib-resources"
@@ -668,8 +670,8 @@ python-versions = ">=3.6"
 zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
-testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-black (>=0.3.7)", "pytest-mypy"]
+docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
+testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"]
 
 [[package]]
 name = "inflection"
@@ -718,7 +720,7 @@ tornado = ">=4.2,<7.0"
 traitlets = ">=5.1.0,<6.0"
 
 [package.extras]
-test = ["pytest (!=5.3.4)", "pytest-cov", "flaky", "nose", "ipyparallel"]
+test = ["flaky", "ipyparallel", "nose", "pytest (!=5.3.4)", "pytest-cov"]
 
 [[package]]
 name = "ipython"
@@ -739,6 +741,7 @@ pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""}
 pickleshare = "*"
 prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0"
 pygments = "*"
+setuptools = ">=18.5"
 traitlets = ">=4.2"
 
 [package.extras]
@@ -747,10 +750,10 @@ doc = ["Sphinx (>=1.3)"]
 kernel = ["ipykernel"]
 nbconvert = ["nbconvert"]
 nbformat = ["nbformat"]
-notebook = ["notebook", "ipywidgets"]
+notebook = ["ipywidgets", "notebook"]
 parallel = ["ipyparallel"]
 qtconsole = ["qtconsole"]
-test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.17)"]
+test = ["ipykernel", "nbformat", "nose (>=0.10.1)", "numpy (>=1.17)", "pygments", "requests", "testpath"]
 
 [[package]]
 name = "ipython-genutils"
@@ -769,10 +772,10 @@ optional = true
 python-versions = ">=3.6.1,<4.0"
 
 [package.extras]
-pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
-requirements_deprecated_finder = ["pipreqs", "pip-api"]
 colors = ["colorama (>=0.4.3,<0.5.0)"]
+pipfile-deprecated-finder = ["pipreqs", "requirementslib"]
 plugins = ["setuptools"]
+requirements-deprecated-finder = ["pip-api", "pipreqs"]
 
 [[package]]
 name = "jedi"
@@ -798,8 +801,8 @@ optional = true
 python-versions = ">=3.6"
 
 [package.extras]
-test = ["pytest", "pytest-trio", "pytest-asyncio", "testpath", "trio", "async-timeout"]
-trio = ["trio", "async-generator"]
+test = ["async-timeout", "pytest", "pytest-asyncio", "pytest-trio", "testpath", "trio"]
+trio = ["async_generator", "trio"]
 
 [[package]]
 name = "jinja2"
@@ -866,7 +869,7 @@ pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
 
 [package.extras]
 format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
-format_nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
+format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
 
 [[package]]
 name = "jupyter-client"
@@ -886,7 +889,7 @@ traitlets = "*"
 
 [package.extras]
 doc = ["sphinx (>=1.3.6)", "sphinx-rtd-theme", "sphinxcontrib-github-alt"]
-test = ["async-generator", "ipykernel", "ipython", "mock", "pytest-asyncio", "pytest-timeout", "pytest", "mypy", "pre-commit", "jedi (<0.18)"]
+test = ["async-generator", "ipykernel", "ipython", "jedi (<0.18)", "mock", "mypy", "pre-commit", "pytest", "pytest-asyncio", "pytest-timeout"]
 
 [[package]]
 name = "jupyter-core"
@@ -926,8 +929,8 @@ pywin32-ctypes = {version = "<0.1.0 || >0.1.0,<0.1.1 || >0.1.1", markers = "sys_
 SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""}
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
-testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-black (>=0.3.7)", "pytest-mypy"]
+docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
+testing = ["pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"]
 
 [[package]]
 name = "kiwisolver"
@@ -1033,9 +1036,9 @@ nest-asyncio = "*"
 traitlets = ">=4.2"
 
 [package.extras]
-dev = ["codecov", "coverage", "ipython", "ipykernel", "ipywidgets", "pytest (>=4.1)", "pytest-cov (>=2.6.1)", "check-manifest", "flake8", "mypy", "tox", "bumpversion", "xmltodict", "pip (>=18.1)", "wheel (>=0.31.0)", "setuptools (>=38.6.0)", "twine (>=1.11.0)", "black"]
-sphinx = ["Sphinx (>=1.7)", "sphinx-book-theme", "mock", "moto", "myst-parser"]
-test = ["codecov", "coverage", "ipython", "ipykernel", "ipywidgets", "pytest (>=4.1)", "pytest-cov (>=2.6.1)", "check-manifest", "flake8", "mypy", "tox", "bumpversion", "xmltodict", "pip (>=18.1)", "wheel (>=0.31.0)", "setuptools (>=38.6.0)", "twine (>=1.11.0)", "black"]
+dev = ["black", "bumpversion", "check-manifest", "codecov", "coverage", "flake8", "ipykernel", "ipython", "ipywidgets", "mypy", "pip (>=18.1)", "pytest (>=4.1)", "pytest-cov (>=2.6.1)", "setuptools (>=38.6.0)", "tox", "twine (>=1.11.0)", "wheel (>=0.31.0)", "xmltodict"]
+sphinx = ["Sphinx (>=1.7)", "mock", "moto", "myst-parser", "sphinx-book-theme"]
+test = ["black", "bumpversion", "check-manifest", "codecov", "coverage", "flake8", "ipykernel", "ipython", "ipywidgets", "mypy", "pip (>=18.1)", "pytest (>=4.1)", "pytest-cov (>=2.6.1)", "setuptools (>=38.6.0)", "tox", "twine (>=1.11.0)", "wheel (>=0.31.0)", "xmltodict"]
 
 [[package]]
 name = "nbconvert"
@@ -1061,10 +1064,10 @@ testpath = "*"
 traitlets = ">=5.0"
 
 [package.extras]
-all = ["pytest", "pytest-cov", "pytest-dependency", "ipykernel", "ipywidgets (>=7)", "pyppeteer (==0.2.6)", "tornado (>=4.0)", "sphinx (>=1.5.1)", "sphinx-rtd-theme", "nbsphinx (>=0.2.12)", "ipython"]
-docs = ["sphinx (>=1.5.1)", "sphinx-rtd-theme", "nbsphinx (>=0.2.12)", "ipython"]
+all = ["ipykernel", "ipython", "ipywidgets (>=7)", "nbsphinx (>=0.2.12)", "pyppeteer (==0.2.6)", "pytest", "pytest-cov", "pytest-dependency", "sphinx (>=1.5.1)", "sphinx-rtd-theme", "tornado (>=4.0)"]
+docs = ["ipython", "nbsphinx (>=0.2.12)", "sphinx (>=1.5.1)", "sphinx-rtd-theme"]
 serve = ["tornado (>=4.0)"]
-test = ["pytest", "pytest-cov", "pytest-dependency", "ipykernel", "ipywidgets (>=7)", "pyppeteer (==0.2.6)"]
+test = ["ipykernel", "ipywidgets (>=7)", "pyppeteer (==0.2.6)", "pytest", "pytest-cov", "pytest-dependency"]
 webpdf = ["pyppeteer (==0.2.6)"]
 
 [[package]]
@@ -1083,7 +1086,7 @@ traitlets = ">=4.1"
 
 [package.extras]
 fast = ["fastjsonschema"]
-test = ["check-manifest", "fastjsonschema", "testpath", "pytest", "pytest-cov"]
+test = ["check-manifest", "fastjsonschema", "pytest", "pytest-cov", "testpath"]
 
 [[package]]
 name = "nest-asyncio"
@@ -1124,6 +1127,54 @@ sphinx = ">=1.6.5"
 [package.extras]
 testing = ["matplotlib", "pytest", "pytest-cov"]
 
+[[package]]
+name = "nvidia-cublas-cu11"
+version = "11.10.3.66"
+description = "CUBLAS native runtime libraries"
+category = "main"
+optional = false
+python-versions = ">=3"
+
+[package.dependencies]
+setuptools = "*"
+wheel = "*"
+
+[[package]]
+name = "nvidia-cuda-nvrtc-cu11"
+version = "11.7.99"
+description = "NVRTC native runtime libraries"
+category = "main"
+optional = false
+python-versions = ">=3"
+
+[package.dependencies]
+setuptools = "*"
+wheel = "*"
+
+[[package]]
+name = "nvidia-cuda-runtime-cu11"
+version = "11.7.99"
+description = "CUDA Runtime native Libraries"
+category = "main"
+optional = false
+python-versions = ">=3"
+
+[package.dependencies]
+setuptools = "*"
+wheel = "*"
+
+[[package]]
+name = "nvidia-cudnn-cu11"
+version = "8.5.0.96"
+description = "cuDNN runtime libraries"
+category = "main"
+optional = false
+python-versions = ">=3"
+
+[package.dependencies]
+setuptools = "*"
+wheel = "*"
+
 [[package]]
 name = "oauthlib"
 version = "3.1.1"
@@ -1201,9 +1252,9 @@ cryptography = ">=2.5"
 pynacl = ">=1.0.1"
 
 [package.extras]
-all = ["pyasn1 (>=0.1.7)", "pynacl (>=1.0.1)", "bcrypt (>=3.1.3)", "invoke (>=1.3)", "gssapi (>=1.4.1)", "pywin32 (>=2.1.8)"]
-ed25519 = ["pynacl (>=1.0.1)", "bcrypt (>=3.1.3)"]
-gssapi = ["pyasn1 (>=0.1.7)", "gssapi (>=1.4.1)", "pywin32 (>=2.1.8)"]
+all = ["bcrypt (>=3.1.3)", "gssapi (>=1.4.1)", "invoke (>=1.3)", "pyasn1 (>=0.1.7)", "pynacl (>=1.0.1)", "pywin32 (>=2.1.8)"]
+ed25519 = ["bcrypt (>=3.1.3)", "pynacl (>=1.0.1)"]
+gssapi = ["gssapi (>=1.4.1)", "pyasn1 (>=0.1.7)", "pywin32 (>=2.1.8)"]
 invoke = ["invoke (>=1.3)"]
 
 [[package]]
@@ -1255,14 +1306,14 @@ gym = ">=0.21.0"
 numpy = ">=1.18.0"
 
 [package.extras]
-all = ["multi_agent_ale_py (==0.1.11)", "pygame (==2.1.0)", "chess (==1.7.0)", "rlcard (==1.0.4)", "pygame (==2.1.0)", "hanabi_learning_environment (==0.0.1)", "pygame (==2.1.0)", "pymunk (==6.2.0)", "magent (==0.2.0)", "pyglet (>=1.4.0)", "pygame (==2.1.0)", "box2d-py (==2.3.5)", "scipy (>=1.4.1)", "pillow (>=8.0.1)"]
+all = ["box2d-py (==2.3.5)", "chess (==1.7.0)", "hanabi_learning_environment (==0.0.1)", "magent (==0.2.0)", "multi_agent_ale_py (==0.1.11)", "pillow (>=8.0.1)", "pygame (==2.1.0)", "pygame (==2.1.0)", "pygame (==2.1.0)", "pygame (==2.1.0)", "pyglet (>=1.4.0)", "pymunk (==6.2.0)", "rlcard (==1.0.4)", "scipy (>=1.4.1)"]
 atari = ["multi_agent_ale_py (==0.1.11)", "pygame (==2.1.0)"]
 butterfly = ["pygame (==2.1.0)", "pymunk (==6.2.0)"]
-classic = ["chess (==1.7.0)", "rlcard (==1.0.4)", "pygame (==2.1.0)", "hanabi_learning_environment (==0.0.1)"]
+classic = ["chess (==1.7.0)", "hanabi_learning_environment (==0.0.1)", "pygame (==2.1.0)", "rlcard (==1.0.4)"]
 magent = ["magent (==0.2.0)"]
 mpe = ["pyglet (>=1.4.0)"]
 other = ["pillow (>=8.0.1)"]
-sisl = ["pygame (==2.1.0)", "box2d-py (==2.3.5)", "scipy (>=1.4.1)"]
+sisl = ["box2d-py (==2.3.5)", "pygame (==2.1.0)", "scipy (>=1.4.1)"]
 tests = ["pynput"]
 
 [[package]]
@@ -1369,7 +1420,7 @@ python-versions = "*"
 six = "*"
 
 [package.extras]
-test = ["pytest (>=2.7.3)", "pytest-cov", "coveralls", "futures", "pytest-benchmark", "mock"]
+test = ["coveralls", "futures", "mock", "pytest (>=2.7.3)", "pytest-benchmark", "pytest-cov"]
 
 [[package]]
 name = "prompt-toolkit"
@@ -1399,7 +1450,7 @@ optional = false
 python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 
 [package.extras]
-test = ["ipaddress", "mock", "unittest2", "enum34", "pywin32", "wmi"]
+test = ["enum34", "ipaddress", "mock", "pywin32", "unittest2", "wmi"]
 
 [[package]]
 name = "ptyprocess"
@@ -1529,8 +1580,8 @@ cffi = ">=1.4.1"
 six = "*"
 
 [package.extras]
-docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"]
-tests = ["pytest (>=3.2.1,!=3.3.0)", "hypothesis (>=3.27.0)"]
+docs = ["sphinx (>=1.6.5)", "sphinx_rtd_theme"]
+tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]
 
 [[package]]
 name = "pyobjc"
@@ -3393,7 +3444,7 @@ python-lsp-server = "*"
 toml = "*"
 
 [package.extras]
-dev = ["isort (>=5.0)", "flake8", "pytest", "mypy"]
+dev = ["flake8", "isort (>=5.0)", "mypy", "pytest"]
 
 [[package]]
 name = "python-lsp-jsonrpc"
@@ -3407,7 +3458,7 @@ python-versions = "*"
 ujson = ">=3.0.0"
 
 [package.extras]
-test = ["pylint", "pycodestyle", "pyflakes", "pytest", "pytest-cov", "coverage"]
+test = ["coverage", "pycodestyle", "pyflakes", "pylint", "pytest", "pytest-cov"]
 
 [[package]]
 name = "python-lsp-server"
@@ -3429,6 +3480,7 @@ pyflakes = {version = ">=2.3.0,<2.4.0", optional = true, markers = "extra == \"a
 pylint = {version = ">=2.5.0,<2.10.0", optional = true, markers = "extra == \"all\""}
 python-lsp-jsonrpc = ">=1.0.0"
 rope = {version = ">=0.10.5", optional = true, markers = "extra == \"all\""}
+setuptools = ">=39.0.0"
 ujson = ">=3.0.0"
 yapf = {version = "*", optional = true, markers = "extra == \"all\""}
 
@@ -3442,7 +3494,7 @@ pydocstyle = ["pydocstyle (>=2.0.0)"]
 pyflakes = ["pyflakes (>=2.3.0,<2.4.0)"]
 pylint = ["pylint (>=2.5.0,<2.10.0)"]
 rope = ["rope (>0.10.5)"]
-test = ["pylint (>=2.5.0,<2.10.0)", "pytest", "pytest-cov", "coverage", "numpy", "pandas", "matplotlib", "pyqt5", "flaky"]
+test = ["coverage", "flaky", "matplotlib", "numpy", "pandas", "pylint (>=2.5.0,<2.10.0)", "pyqt5", "pytest", "pytest-cov"]
 yapf = ["yapf"]
 
 [[package]]
@@ -3541,7 +3593,7 @@ tinycss2 = ">=0.5,<2"
 
 [package.extras]
 doc = ["sphinx (>=1.2.2,<2)", "sphinx-rtd-theme (>=0.1.6,<1)", "sphinxcontrib-autoprogram (>=0.1.2,!=0.1.3,<1)"]
-test = ["pytest (>=6,<7)", "pytest-mock (>=3,<4)", "pytest-catchlog (>=1,<2)"]
+test = ["pytest (>=6,<7)", "pytest-catchlog (>=1,<2)", "pytest-mock (>=3,<4)"]
 
 [[package]]
 name = "qtawesome"
@@ -3608,7 +3660,7 @@ urllib3 = ">=1.21.1,<1.27"
 
 [package.extras]
 socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
-use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<5)"]
 
 [[package]]
 name = "requests-oauthlib"
@@ -3701,15 +3753,28 @@ celery = ["celery (>=3)"]
 chalice = ["chalice (>=1.16.0)"]
 django = ["django (>=1.8)"]
 falcon = ["falcon (>=1.4)"]
-flask = ["flask (>=0.11)", "blinker (>=1.1)"]
+flask = ["blinker (>=1.1)", "flask (>=0.11)"]
 httpx = ["httpx (>=0.16.0)"]
-pure_eval = ["pure-eval", "executing", "asttokens"]
+pure-eval = ["asttokens", "executing", "pure-eval"]
 pyspark = ["pyspark (>=2.4.4)"]
 rq = ["rq (>=0.6)"]
 sanic = ["sanic (>=0.8)"]
 sqlalchemy = ["sqlalchemy (>=1.2)"]
 tornado = ["tornado (>=5)"]
 
+[[package]]
+name = "setuptools"
+version = "65.5.1"
+description = "Easily download, build, install, upgrade, and uninstall Python packages"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
+testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
+
 [[package]]
 name = "shortuuid"
 version = "1.0.1"
@@ -3768,6 +3833,7 @@ Jinja2 = ">=2.3"
 packaging = "*"
 Pygments = ">=2.0"
 requests = ">=2.5.0"
+setuptools = "*"
 snowballstemmer = ">=1.1"
 sphinxcontrib-applehelp = "*"
 sphinxcontrib-devhelp = "*"
@@ -3778,8 +3844,8 @@ sphinxcontrib-serializinghtml = ">=1.1.5"
 
 [package.extras]
 docs = ["sphinxcontrib-websupport"]
-lint = ["flake8 (>=3.5.0)", "isort", "mypy (>=0.900)", "docutils-stubs", "types-typed-ast", "types-pkg-resources", "types-requests"]
-test = ["pytest", "pytest-cov", "html5lib", "cython", "typed-ast"]
+lint = ["docutils-stubs", "flake8 (>=3.5.0)", "isort", "mypy (>=0.900)", "types-pkg-resources", "types-requests", "types-typed-ast"]
+test = ["cython", "html5lib", "pytest", "pytest-cov", "typed-ast"]
 
 [[package]]
 name = "sphinxcontrib-applehelp"
@@ -3790,7 +3856,7 @@ optional = true
 python-versions = ">=3.5"
 
 [package.extras]
-lint = ["flake8", "mypy", "docutils-stubs"]
+lint = ["docutils-stubs", "flake8", "mypy"]
 test = ["pytest"]
 
 [[package]]
@@ -3802,7 +3868,7 @@ optional = true
 python-versions = ">=3.5"
 
 [package.extras]
-lint = ["flake8", "mypy", "docutils-stubs"]
+lint = ["docutils-stubs", "flake8", "mypy"]
 test = ["pytest"]
 
 [[package]]
@@ -3814,8 +3880,8 @@ optional = true
 python-versions = ">=3.6"
 
 [package.extras]
-lint = ["flake8", "mypy", "docutils-stubs"]
-test = ["pytest", "html5lib"]
+lint = ["docutils-stubs", "flake8", "mypy"]
+test = ["html5lib", "pytest"]
 
 [[package]]
 name = "sphinxcontrib-jsmath"
@@ -3826,7 +3892,7 @@ optional = true
 python-versions = ">=3.5"
 
 [package.extras]
-test = ["pytest", "flake8", "mypy"]
+test = ["flake8", "mypy", "pytest"]
 
 [[package]]
 name = "sphinxcontrib-qthelp"
@@ -3837,7 +3903,7 @@ optional = true
 python-versions = ">=3.5"
 
 [package.extras]
-lint = ["flake8", "mypy", "docutils-stubs"]
+lint = ["docutils-stubs", "flake8", "mypy"]
 test = ["pytest"]
 
 [[package]]
@@ -3849,7 +3915,7 @@ optional = true
 python-versions = ">=3.5"
 
 [package.extras]
-lint = ["flake8", "mypy", "docutils-stubs"]
+lint = ["docutils-stubs", "flake8", "mypy"]
 test = ["pytest"]
 
 [[package]]
@@ -3894,6 +3960,7 @@ qtawesome = ">=1.0.2"
 qtconsole = ">=5.1.0"
 qtpy = ">=1.5.0"
 rtree = ">=0.9.7"
+setuptools = ">=49.6.0"
 sphinx = ">=0.6.6"
 spyder-kernels = ">=2.1.1,<2.2.0"
 textdistance = ">=4.2.0"
@@ -3901,7 +3968,7 @@ three-merge = ">=0.1.1"
 watchdog = ">=0.10.3"
 
 [package.extras]
-test = ["coverage", "cython", "flaky", "matplotlib", "pandas", "pillow", "pytest (<6.0)", "pytest-cov", "pytest-lazy-fixture", "pytest-mock", "pytest-order", "pytest-qt", "pyyaml", "scipy", "sympy", "pytest-xvfb", "pywin32"]
+test = ["coverage", "cython", "flaky", "matplotlib", "pandas", "pillow", "pytest (<6.0)", "pytest-cov", "pytest-lazy-fixture", "pytest-mock", "pytest-order", "pytest-qt", "pytest-xvfb", "pywin32", "pyyaml", "scipy", "sympy"]
 
 [[package]]
 name = "spyder-kernels"
@@ -3920,7 +3987,7 @@ pyzmq = ">=17"
 wurlitzer = {version = ">=1.0.3", markers = "platform_system != \"Windows\""}
 
 [package.extras]
-test = ["codecov", "cython", "dask", "flaky", "matplotlib", "mock", "numpy", "pandas", "pytest", "pytest-cov", "scipy", "xarray", "pillow"]
+test = ["codecov", "cython", "dask[distributed]", "flaky", "matplotlib", "mock", "numpy", "pandas", "pillow", "pytest", "pytest-cov", "scipy", "xarray"]
 
 [[package]]
 name = "stable-baselines3"
@@ -3939,9 +4006,9 @@ pandas = "*"
 torch = ">=1.4.0"
 
 [package.extras]
-docs = ["sphinx", "sphinx-autobuild", "sphinx-rtd-theme", "sphinxcontrib.spelling", "sphinx-autodoc-typehints"]
-extra = ["opencv-python", "atari-py (>=0.2.0,<0.3.0)", "pillow", "tensorboard (>=2.2.0)", "psutil"]
-tests = ["pytest", "pytest-cov", "pytest-env", "pytest-xdist", "pytype", "flake8 (>=3.8)", "flake8-bugbear", "isort (>=5.0)", "black"]
+docs = ["sphinx", "sphinx-autobuild", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib.spelling"]
+extra = ["atari-py (>=0.2.0,<0.3.0)", "opencv-python", "pillow", "psutil", "tensorboard (>=2.2.0)"]
+tests = ["black", "flake8 (>=3.8)", "flake8-bugbear", "isort (>=5.0)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "pytype"]
 
 [[package]]
 name = "subprocess32"
@@ -3968,9 +4035,11 @@ markdown = ">=2.6.8"
 numpy = ">=1.12.0"
 protobuf = ">=3.6.0"
 requests = ">=2.21.0,<3"
+setuptools = ">=41.0.0"
 tensorboard-data-server = ">=0.6.0,<0.7.0"
 tensorboard-plugin-wit = ">=1.6.0"
 werkzeug = ">=0.11.15"
+wheel = ">=0.26"
 
 [[package]]
 name = "tensorboard-data-server"
@@ -4005,7 +4074,7 @@ optional = true
 python-versions = ">= 3.5"
 
 [package.extras]
-test = ["pytest", "pathlib2"]
+test = ["pathlib2", "pytest"]
 
 [[package]]
 name = "text-unidecode"
@@ -4024,17 +4093,17 @@ optional = true
 python-versions = ">=3.5"
 
 [package.extras]
-dameraulevenshtein = ["jellyfish", "pyxdameraulevenshtein"]
-hamming = ["python-levenshtein", "jellyfish", "distance", "abydos"]
-jaro = ["python-levenshtein"]
+all = ["abydos", "distance", "jellyfish", "numpy", "py-stringmatching", "pylev", "python-Levenshtein", "pyxDamerauLevenshtein", "tabulate"]
+benchmark = ["abydos", "distance", "jellyfish", "numpy", "py-stringmatching", "pylev", "python-Levenshtein", "pyxDamerauLevenshtein", "tabulate"]
+benchmarks = ["abydos", "distance", "jellyfish", "numpy", "py-stringmatching", "pylev", "python-Levenshtein", "pyxDamerauLevenshtein", "tabulate"]
+common = ["abydos", "jellyfish", "numpy", "python-Levenshtein", "pyxDamerauLevenshtein"]
+dameraulevenshtein = ["jellyfish", "pyxDamerauLevenshtein"]
+extra = ["abydos", "jellyfish", "numpy", "python-Levenshtein", "pyxDamerauLevenshtein"]
+extras = ["abydos", "jellyfish", "numpy", "python-Levenshtein", "pyxDamerauLevenshtein"]
+hamming = ["abydos", "distance", "jellyfish", "python-Levenshtein"]
+jaro = ["python-Levenshtein"]
 jarowinkler = ["jellyfish"]
-levenshtein = ["python-levenshtein"]
-all = ["abydos", "jellyfish", "numpy", "python-levenshtein", "pyxdameraulevenshtein", "distance", "pylev", "py-stringmatching", "tabulate"]
-benchmark = ["abydos", "jellyfish", "numpy", "python-levenshtein", "pyxdameraulevenshtein", "distance", "pylev", "py-stringmatching", "tabulate"]
-benchmarks = ["abydos", "jellyfish", "numpy", "python-levenshtein", "pyxdameraulevenshtein", "distance", "pylev", "py-stringmatching", "tabulate"]
-common = ["abydos", "jellyfish", "numpy", "python-levenshtein", "pyxdameraulevenshtein"]
-extra = ["abydos", "jellyfish", "numpy", "python-levenshtein", "pyxdameraulevenshtein"]
-extras = ["abydos", "jellyfish", "numpy", "python-levenshtein", "pyxdameraulevenshtein"]
+levenshtein = ["python-Levenshtein"]
 test = ["hypothesis", "isort", "numpy", "pytest"]
 
 [[package]]
@@ -4049,7 +4118,7 @@ python-versions = "*"
 diff-match-patch = "*"
 
 [package.extras]
-test = ["pytest", "pytest-cov", "flaky", "pytest-timeout"]
+test = ["flaky", "pytest", "pytest-cov", "pytest-timeout"]
 
 [[package]]
 name = "tinycss2"
@@ -4063,8 +4132,8 @@ python-versions = ">=3.6"
 webencodings = ">=0.4"
 
 [package.extras]
-doc = ["sphinx", "sphinx-rtd-theme"]
-test = ["pytest", "pytest-cov", "pytest-flake8", "pytest-isort", "coverage"]
+doc = ["sphinx", "sphinx_rtd_theme"]
+test = ["coverage[toml]", "pytest", "pytest-cov", "pytest-flake8", "pytest-isort"]
 
 [[package]]
 name = "toml"
@@ -4092,16 +4161,22 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 
 [[package]]
 name = "torch"
-version = "1.7.1"
+version = "1.13.0"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 category = "main"
 optional = false
-python-versions = ">=3.6.2"
+python-versions = ">=3.7.0"
 
 [package.dependencies]
-numpy = "*"
+nvidia-cublas-cu11 = "11.10.3.66"
+nvidia-cuda-nvrtc-cu11 = "11.7.99"
+nvidia-cuda-runtime-cu11 = "11.7.99"
+nvidia-cudnn-cu11 = "8.5.0.96"
 typing-extensions = "*"
 
+[package.extras]
+opt-einsum = ["opt-einsum (>=3.3)"]
+
 [[package]]
 name = "tornado"
 version = "6.1"
@@ -4166,7 +4241,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
 
 [package.extras]
 brotli = ["brotlipy (>=0.6.0)"]
-secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
+secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)"]
 socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 
 [[package]]
@@ -4186,7 +4261,7 @@ six = ">=1.9.0,<2"
 
 [package.extras]
 docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=21.3)"]
-testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)"]
+testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "packaging (>=20.0)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)"]
 
 [[package]]
 name = "wandb"
@@ -4217,11 +4292,11 @@ yaspin = ">=1.0.0"
 [package.extras]
 aws = ["boto3"]
 gcp = ["google-cloud-storage"]
-kubeflow = ["kubernetes", "minio", "google-cloud-storage", "sh"]
-launch = ["jupyter-repo2docker", "nbconvert", "chardet", "iso8601", "typing-extensions", "yaspin"]
-media = ["numpy", "moviepy", "pillow", "bokeh", "soundfile", "plotly"]
+kubeflow = ["google-cloud-storage", "kubernetes", "minio", "sh"]
+launch = ["chardet", "iso8601", "jupyter-repo2docker", "nbconvert", "typing-extensions", "yaspin"]
+media = ["bokeh", "moviepy", "numpy", "pillow", "plotly", "soundfile"]
 service = ["grpcio (>=1.27.2)", "setproctitle"]
-sweeps = ["numpy (>=1.15,<1.21)", "scipy (>=1.5.4)", "pyyaml", "scikit-learn (==0.24.1)", "jsonschema (>=3.2.0)", "jsonref (>=0.2)", "pydantic (>=1.8.2)"]
+sweeps = ["PyYAML", "jsonref (>=0.2)", "jsonschema (>=3.2.0)", "numpy (>=1.15,<1.21)", "pydantic (>=1.8.2)", "scikit-learn (==0.24.1)", "scipy (>=1.5.4)"]
 
 [[package]]
 name = "watchdog"
@@ -4261,6 +4336,17 @@ python-versions = ">=3.6"
 [package.extras]
 watchdog = ["watchdog"]
 
+[[package]]
+name = "wheel"
+version = "0.38.4"
+description = "A built-package format for Python"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+test = ["pytest (>=3.0.0)"]
+
 [[package]]
 name = "wrapt"
 version = "1.12.1"
@@ -4305,8 +4391,8 @@ optional = false
 python-versions = ">=3.6"
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
-testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"]
+docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"]
+testing = ["func-timeout", "jaraco.itertools", "pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"]
 
 [extras]
 cleanrl = ["cleanrl"]
@@ -4315,7 +4401,7 @@ spyder = ["spyder"]
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.7.1,<3.10"
-content-hash = "5ca5d23e3deb68939d9ddba4fcdaac52e05cc4506cc1a5cd13d4bcf21f73fc17"
+content-hash = "b65fa3abdc9d5b62b2f8a5b4524fbe18e8c679f20c74fccd6633e7f80a877807"
 
 [metadata.files]
 absl-py = [
@@ -4371,10 +4457,13 @@ backcall = [
     {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"},
 ]
 bcrypt = [
+    {file = "bcrypt-3.2.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:b589229207630484aefe5899122fb938a5b017b0f4349f769b8c13e78d99a8fd"},
     {file = "bcrypt-3.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c95d4cbebffafcdd28bd28bb4e25b31c50f6da605c81ffd9ad8a3d1b2ab7b1b6"},
     {file = "bcrypt-3.2.0-cp36-abi3-manylinux1_x86_64.whl", hash = "sha256:63d4e3ff96188e5898779b6057878fecf3f11cfe6ec3b313ea09955d587ec7a7"},
     {file = "bcrypt-3.2.0-cp36-abi3-manylinux2010_x86_64.whl", hash = "sha256:cd1ea2ff3038509ea95f687256c46b79f5fc382ad0aa3664d200047546d511d1"},
     {file = "bcrypt-3.2.0-cp36-abi3-manylinux2014_aarch64.whl", hash = "sha256:cdcdcb3972027f83fe24a48b1e90ea4b584d35f1cc279d76de6fc4b13376239d"},
+    {file = "bcrypt-3.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a0584a92329210fcd75eb8a3250c5a941633f8bfaf2a18f81009b097732839b7"},
+    {file = "bcrypt-3.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:56e5da069a76470679f312a7d3d23deb3ac4519991a0361abc11da837087b61d"},
     {file = "bcrypt-3.2.0-cp36-abi3-win32.whl", hash = "sha256:a67fb841b35c28a59cebed05fbd3e80eea26e6d75851f0574a9273c80f3e9b55"},
     {file = "bcrypt-3.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:81fec756feff5b6818ea7ab031205e1d323d8943d237303baca2c5f9c7846f34"},
     {file = "bcrypt-3.2.0.tar.gz", hash = "sha256:5b93c1726e50a93a033c36e5ca7fdcd29a5c7395af50a6892f5d9e7c6cfbfb29"},
@@ -4833,12 +4922,28 @@ markdown = [
     {file = "Markdown-3.3.4.tar.gz", hash = "sha256:31b5b491868dcc87d6c24b7e3d19a0d730d59d3e46f4eea6430a321bed387a49"},
 ]
 markupsafe = [
+    {file = "MarkupSafe-2.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4dc8f9fb58f7364b63fd9f85013b780ef83c11857ae79f2feda41e270468dd9b"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20dca64a3ef2d6e4d5d615a3fd418ad3bde77a47ec8a23d984a12b5b4c74491a"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cdfba22ea2f0029c9261a4bd07e830a8da012291fbe44dc794e488b6c9bb353a"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-win32.whl", hash = "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28"},
+    {file = "MarkupSafe-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f"},
+    {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c"},
+    {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724"},
+    {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145"},
+    {file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:deb993cacb280823246a026e3b2d81c493c53de6acfd5e6bfe31ab3402bb37dd"},
+    {file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:63f3268ba69ace99cab4e3e3b5840b03340efed0948ab8f78d2fd87ee5442a4f"},
+    {file = "MarkupSafe-2.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:8d206346619592c6200148b01a2142798c989edcb9c896f9ac9722a99d4e77e6"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"},
     {file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"},
@@ -4847,14 +4952,27 @@ markupsafe = [
     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d"},
+    {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85"},
+    {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6"},
+    {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864"},
+    {file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d6c7ebd4e944c85e2c3421e612a7057a2f48d478d79e61800d81468a8d842207"},
+    {file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f0567c4dc99f264f49fe27da5f735f414c4e7e7dd850cfd8e69f0862d7c74ea9"},
+    {file = "MarkupSafe-2.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:89c687013cb1cd489a0f0ac24febe8c7a666e6e221b783e53ac50ebf68e45d86"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"},
     {file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aca6377c0cb8a8253e493c6b451565ac77e98c2951c45f913e0b52facdcff83f"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:04635854b943835a6ea959e948d19dcd311762c5c0c6e1f0e16ee57022669194"},
+    {file = "MarkupSafe-2.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6300b8454aa6930a24b9618fbb54b5a68135092bc666f7b06901f897fa5c2fee"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"},
     {file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"},
@@ -4864,6 +4982,12 @@ markupsafe = [
     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509"},
+    {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1"},
+    {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac"},
+    {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6"},
+    {file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4296f2b1ce8c86a6aea78613c34bb1a672ea0e3de9c6ba08a960efe0b0a09047"},
+    {file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f02365d4e99430a12647f09b6cc8bab61a6564363f313126f775eb4f6ef798e"},
+    {file = "MarkupSafe-2.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5b6d930f030f8ed98e3e6c98ffa0652bdb82601e7a016ec2ab5d7ff23baa78d1"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"},
     {file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"},
     {file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"},
@@ -4963,6 +5087,23 @@ numpydoc = [
     {file = "numpydoc-1.1.0-py3-none-any.whl", hash = "sha256:c53d6311190b9e3b9285bc979390ba0257ba9acde5eca1a7065fc8dfca9d46e8"},
     {file = "numpydoc-1.1.0.tar.gz", hash = "sha256:c36fd6cb7ffdc9b4e165a43f67bf6271a7b024d0bb6b00ac468c9e2bfc76448e"},
 ]
+nvidia-cublas-cu11 = [
+    {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl", hash = "sha256:d32e4d75f94ddfb93ea0a5dda08389bcc65d8916a25cb9f37ac89edaeed3bded"},
+    {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-win_amd64.whl", hash = "sha256:8ac17ba6ade3ed56ab898a036f9ae0756f1e81052a317bf98f8c6d18dc3ae49e"},
+]
+nvidia-cuda-nvrtc-cu11 = [
+    {file = "nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:9f1562822ea264b7e34ed5930567e89242d266448e936b85bc97a3370feabb03"},
+    {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:f7d9610d9b7c331fa0da2d1b2858a4a8315e6d49765091d28711c8946e7425e7"},
+    {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:f2effeb1309bdd1b3854fc9b17eaf997808f8b25968ce0c7070945c4265d64a3"},
+]
+nvidia-cuda-runtime-cu11 = [
+    {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:cc768314ae58d2641f07eac350f40f99dcb35719c4faff4bc458a7cd2b119e31"},
+    {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:bc77fa59a7679310df9d5c70ab13c4e34c64ae2124dd1efd7e5474b71be125c7"},
+]
+nvidia-cudnn-cu11 = [
+    {file = "nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:402f40adfc6f418f9dae9ab402e773cfed9beae52333f6d86ae3107a1b9527e7"},
+    {file = "nvidia_cudnn_cu11-8.5.0.96-py3-none-manylinux1_x86_64.whl", hash = "sha256:71f8111eb830879ff2836db3cccf03bbd735df9b0d17cd93761732ac50a8a108"},
+]
 oauthlib = [
     {file = "oauthlib-3.1.1-py2.py3-none-any.whl", hash = "sha256:42bf6354c2ed8c6acb54d971fce6f88193d97297e18602a3a886603f9d7730cc"},
     {file = "oauthlib-3.1.1.tar.gz", hash = "sha256:8f0215fcc533dd8dd1bee6f4c412d4f0cd7297307d43ac61666389e3bc3198a3"},
@@ -5004,8 +5145,11 @@ packaging = [
     {file = "packaging-21.2.tar.gz", hash = "sha256:096d689d78ca690e4cd8a89568ba06d07ca097e3306a4381635073ca91479966"},
 ]
 pandas = [
+    {file = "pandas-1.3.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9707bdc1ea9639c886b4d3be6e2a45812c1ac0c2080f94c31b71c9fa35556f9b"},
+    {file = "pandas-1.3.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c2f44425594ae85e119459bb5abb0748d76ef01d9c08583a667e3339e134218e"},
     {file = "pandas-1.3.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a"},
     {file = "pandas-1.3.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63"},
+    {file = "pandas-1.3.4-cp310-cp310-win_amd64.whl", hash = "sha256:4acc28364863127bca1029fb72228e6f473bb50c32e77155e80b410e2068eeac"},
     {file = "pandas-1.3.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5"},
     {file = "pandas-1.3.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f"},
     {file = "pandas-1.3.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f"},
@@ -5281,6 +5425,8 @@ pyobjc = [
 ]
 pyobjc-core = [
     {file = "pyobjc-core-7.3.tar.gz", hash = "sha256:5081aedf8bb40aac1a8ad95adac9e44e148a882686ded614adf46bb67fd67574"},
+    {file = "pyobjc_core-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a1f1e6b457127cbf2b5bd2b94520a7c89fb590b739911eadb2b0499a3a5b0e6f"},
+    {file = "pyobjc_core-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:ed708cc47bae8b711f81f252af09898a5f986c7a38cec5ad5623d571d328bff8"},
     {file = "pyobjc_core-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4e93ad769a20b908778fe950f62a843a6d8f0fa71996e5f3cc9fab5ae7d17771"},
     {file = "pyobjc_core-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9f63fd37bbf3785af4ddb2f86cad5ca81c62cfc7d1c0099637ca18343c3656c1"},
     {file = "pyobjc_core-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e9b1311f72f2e170742a7ee3a8149f52c35158dc024a21e88d6f1e52ba5d718b"},
@@ -5319,6 +5465,8 @@ pyobjc-framework-applescriptobjc = [
 ]
 pyobjc-framework-applicationservices = [
     {file = "pyobjc-framework-ApplicationServices-7.3.tar.gz", hash = "sha256:1925ac30a817e557d1c08450005103bbf76ebd3ff473631fe9875070377b0b4d"},
+    {file = "pyobjc_framework_ApplicationServices-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:484e5b5e9f1757ad7e28799bb5d5d59ce861a3e5449f06fc3a0d05b998e9e6bb"},
+    {file = "pyobjc_framework_ApplicationServices-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:ec0c07775ff7034751306fa382117d12ae8e383b696cda1b2815dfd334c36ff7"},
     {file = "pyobjc_framework_ApplicationServices-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:daa4a9c51a927630fdd3d3f627e03ebc370aee3c397305db85a0a8ba4c28ae93"},
     {file = "pyobjc_framework_ApplicationServices-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:167aa21ee47b0ee6e4e399915371d183ae84880dc3813c27519e759acb9d20c9"},
     {file = "pyobjc_framework_ApplicationServices-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7a98f0f1e21465868f9dd32588ae71e5e6a4cb5c434d4158c9e12273fd7b8f27"},
@@ -5381,6 +5529,8 @@ pyobjc-framework-cloudkit = [
 ]
 pyobjc-framework-cocoa = [
     {file = "pyobjc-framework-Cocoa-7.3.tar.gz", hash = "sha256:b18d05e7a795a3455ad191c3e43d6bfa673c2a4fd480bb1ccf57191051b80b7e"},
+    {file = "pyobjc_framework_Cocoa-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1e31376806e5de883a1d7c7c87d9ff2a8b09fc05d267e0dfce6e42409fb70c67"},
+    {file = "pyobjc_framework_Cocoa-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:d999387927284346035cb63ebb51f86331abc41f9376f9a6970e7f18207db392"},
     {file = "pyobjc_framework_Cocoa-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9edffdfa6dd1f71f21b531c3e61fdd3e4d5d3bf6c5a528c98e88828cd60bac11"},
     {file = "pyobjc_framework_Cocoa-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:35a6340437a4e0109a302150b7d1f6baf57004ccf74834f9e6062fcafe2fd8d7"},
     {file = "pyobjc_framework_Cocoa-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7c3886f2608ab3ed02482f8b2ebf9f782b324c559e84b52cfd92dba8a1109872"},
@@ -5407,6 +5557,8 @@ pyobjc-framework-contactsui = [
 ]
 pyobjc-framework-coreaudio = [
     {file = "pyobjc-framework-CoreAudio-7.3.tar.gz", hash = "sha256:37d161dc459ba309fa5f46655662cd63ff850b5edddde463c58594bdf4b4dee4"},
+    {file = "pyobjc_framework_CoreAudio-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:15afd08639ee05b8b2924f63a54bea3e7893eda0efeda0debc94859e88db943a"},
+    {file = "pyobjc_framework_CoreAudio-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:2f74767f25a0dfdab0ccaeaac2a971fde1b5d4035d2314a60ed149f9e32b8401"},
     {file = "pyobjc_framework_CoreAudio-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:11876f4eb434a492674f8b61a5e9ebd6d9f5bc5ba49a2dd56e5e8dcfee92138f"},
     {file = "pyobjc_framework_CoreAudio-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b4fec7d9be1c094caefea862782960d7ee4e33526e31896fee5838b8fe95d01f"},
     {file = "pyobjc_framework_CoreAudio-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f2ef8d2b1a2aab20f12fe978adcdcc21f5c83f5c0374e0cc908e13c49d3a2d69"},
@@ -5439,6 +5591,8 @@ pyobjc-framework-corelocation = [
 ]
 pyobjc-framework-coremedia = [
     {file = "pyobjc-framework-CoreMedia-7.3.tar.gz", hash = "sha256:c95a09979709241e50a2b000f6772751fed99850f1aaa2cacafd039f3a6b3e99"},
+    {file = "pyobjc_framework_CoreMedia-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f0afa7868bb5225e1acb3c4b5dd2315b866d4b6735f81ef315ac2ca0a985fc0b"},
+    {file = "pyobjc_framework_CoreMedia-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:122820c9fc66deda73887c6799c03236fc42fe1445eee6b9c4b28e81d94a3fe7"},
     {file = "pyobjc_framework_CoreMedia-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:38a86c24e337b895fa4832323085f2cc84fb5bffaf1c6c4f54173e9774d4017d"},
     {file = "pyobjc_framework_CoreMedia-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:53e874af4c5cedcbb21a52b6482f98afb402798062326efef0f08de37f7af002"},
     {file = "pyobjc_framework_CoreMedia-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2b911875f1630da6d918c02ca5617e8b5692cc2c8a222874befea19c453fcad7"},
@@ -5476,6 +5630,8 @@ pyobjc-framework-corespotlight = [
 ]
 pyobjc-framework-coretext = [
     {file = "pyobjc-framework-CoreText-7.3.tar.gz", hash = "sha256:5b5fc91bcbd2fe5199f6b65971d62bea02f942c76d6acb59168c041c7af435d9"},
+    {file = "pyobjc_framework_CoreText-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ea87b8409d247d0d9968657f36938c62c47369f65ea1094d96b5f6db87c8db0f"},
+    {file = "pyobjc_framework_CoreText-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:e1bfadf9e0059c0866c3a4783ab30597aa975dcf5386ba3dfb24d032aa3cf084"},
     {file = "pyobjc_framework_CoreText-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a3ae27d5756b9d62d113e7c4f12022f8812bc95bc277f920f0fe2ca45b5272be"},
     {file = "pyobjc_framework_CoreText-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fd3f11b3358cbf5a56d4a01e736322daa5b6ce6e3701d41cc9eafcede0267faa"},
     {file = "pyobjc_framework_CoreText-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d290880894256497425a1abd116076e7a74c8b92a2b1b27d899ede16f52e5f20"},
@@ -5536,6 +5692,8 @@ pyobjc-framework-externalaccessory = [
 ]
 pyobjc-framework-fileprovider = [
     {file = "pyobjc-framework-FileProvider-7.3.tar.gz", hash = "sha256:cec94c9e2eef09e624834a358da7c0827938eb0825c2804b09a2bf20858a6615"},
+    {file = "pyobjc_framework_FileProvider-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ede612a7eaa0bfd39c6e3e68f6d6c7efab3f6f0565f45b90a21f2de7db101d24"},
+    {file = "pyobjc_framework_FileProvider-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:393f90a8a3e445aa829d859e7d29322ac82d2a588c731cc7b6b10c4152e7dc84"},
     {file = "pyobjc_framework_FileProvider-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:55537492938356fb0d2034327d39e84c46b2e7340b923177ba249baf0ce43b38"},
     {file = "pyobjc_framework_FileProvider-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a427f2f560b8568f7992467b4f8acde9dd9de716979a71484cea06ad9e3465ef"},
     {file = "pyobjc_framework_FileProvider-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:370dfde069494818f077ff06ee02cdedc9c3733add6df5336800c5f14c6519ff"},
@@ -5629,6 +5787,8 @@ pyobjc-framework-launchservices = [
 ]
 pyobjc-framework-libdispatch = [
     {file = "pyobjc-framework-libdispatch-7.3.tar.gz", hash = "sha256:c3e63ce294e50a36c17bc9e65ccf3e448995931fc10fc0c15f899d27c438e25f"},
+    {file = "pyobjc_framework_libdispatch-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e945cda52619d53435fbbdccc63d195987bccfdc6abc59b12caf0c16852d6a45"},
+    {file = "pyobjc_framework_libdispatch-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:c03d0885d321c813ce0c3cfda17cb531cd71f005adbfd402275bc4f6ba8d6319"},
     {file = "pyobjc_framework_libdispatch-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2262ab83c6236d168c4e595ecdb1973c1f845dd0dc21840f4a8ce6f900d7e357"},
     {file = "pyobjc_framework_libdispatch-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:57acf9136ad53794c2303dc0f055491270cf70b85f89cef79493c09b60f60f59"},
     {file = "pyobjc_framework_libdispatch-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be032ce5d05ac23e82c18a6d25459b45ceac3f3073949a439584a99fc26582a5"},
@@ -5772,6 +5932,8 @@ pyobjc-framework-pushkit = [
 ]
 pyobjc-framework-quartz = [
     {file = "pyobjc-framework-Quartz-7.3.tar.gz", hash = "sha256:98812844c34262def980bdf60923a875cd43428a8375b6fd53bd2cd800eccf0b"},
+    {file = "pyobjc_framework_Quartz-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1139bc6874c0f8b58f0b8602015e0994198bc506a6bcec1071208de32b55ed26"},
+    {file = "pyobjc_framework_Quartz-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:d94a3ed7051266c52392ec07d3b5adbf28d4be83341a24df0d88639344dcd84f"},
     {file = "pyobjc_framework_Quartz-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1ef18f5a16511ded65980bf4f5983ea5d35c88224dbad1b3112abd29c60413ea"},
     {file = "pyobjc_framework_Quartz-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3b41eec8d4b10c7c7e011e2f9051367f5499ef315ba52dfbae573c3a2e05469c"},
     {file = "pyobjc_framework_Quartz-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c65456ed045dfe1711d0298734e5a3ad670f8c770f7eb3b19979256c388bdd2"},
@@ -5817,6 +5979,8 @@ pyobjc-framework-searchkit = [
 ]
 pyobjc-framework-security = [
     {file = "pyobjc-framework-Security-7.3.tar.gz", hash = "sha256:4109ab15faf2dcf89646330a4f0a6584410d7134418fae0814858cab4ab76347"},
+    {file = "pyobjc_framework_Security-7.3-1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:57ef7656f01bfdd1dfddc3493e90abc74910379bd9319f764d1ac09fc7c470dc"},
+    {file = "pyobjc_framework_Security-7.3-1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:1470400a48cc7e1eab48e67611cd1c551c28d3c39ef4f7ece3a35c52b6694c43"},
     {file = "pyobjc_framework_Security-7.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1a048e42ddca426ac02838e8f4093f138b1fd88f9de8c3c5f087fbaa60cd1987"},
     {file = "pyobjc_framework_Security-7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:77d0c4b68d1409719f7bc023719b1f1a066e48f270bcfedb700b342523b191be"},
     {file = "pyobjc_framework_Security-7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:379287b0a01abc047bee7d2632fa34f038faa8f566c3de6978141bfcf3fc6128"},
@@ -5926,6 +6090,10 @@ pyqt5 = [
     {file = "PyQt5-5.12.3-5.12.10-cp35.cp36.cp37.cp38.cp39-none-win_amd64.whl", hash = "sha256:24b2c60644caae136f92dfe21c6a071badb121a6410f3f73760e70c9f3459dcf"},
 ]
 pyqt5-sip = [
+    {file = "PyQt5_sip-12.9.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6d5bca2fc222d58e8093ee8a81a6e3437067bb22bc3f86d06ec8be721e15e90a"},
+    {file = "PyQt5_sip-12.9.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:d59af63120d1475b2bf94fe8062610720a9be1e8940ea146c7f42bb449d49067"},
+    {file = "PyQt5_sip-12.9.0-cp310-cp310-win32.whl", hash = "sha256:0fc9aefacf502696710b36cdc9fa2a61487f55ee883dbcf2c2a6477e261546f7"},
+    {file = "PyQt5_sip-12.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:485972daff2fb0311013f471998f8ec8262ea381bded244f9d14edaad5f54271"},
     {file = "PyQt5_sip-12.9.0-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:d85002238b5180bce4b245c13d6face848faa1a7a9e5c6e292025004f2fd619a"},
     {file = "PyQt5_sip-12.9.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:83c3220b1ca36eb8623ba2eb3766637b19eb0ce9f42336ad8253656d32750c0a"},
     {file = "PyQt5_sip-12.9.0-cp36-cp36m-win32.whl", hash = "sha256:d8b2bdff7bbf45bc975c113a03b14fd669dc0c73e1327f02706666a7dd51a197"},
@@ -6058,24 +6226,32 @@ pyzmq = [
     {file = "pyzmq-22.3.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f89468059ebc519a7acde1ee50b779019535db8dcf9b8c162ef669257fef7a93"},
     {file = "pyzmq-22.3.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea12133df25e3a6918718fbb9a510c6ee5d3fdd5a346320421aac3882f4feeea"},
     {file = "pyzmq-22.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76c532fd68b93998aab92356be280deec5de8f8fe59cd28763d2cc8a58747b7f"},
+    {file = "pyzmq-22.3.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f907c7359ce8bf7f7e63c82f75ad0223384105f5126f313400b7e8004d9b33c3"},
+    {file = "pyzmq-22.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:902319cfe23366595d3fa769b5b751e6ee6750a0a64c5d9f757d624b2ac3519e"},
     {file = "pyzmq-22.3.0-cp310-cp310-win32.whl", hash = "sha256:67db33bea0a29d03e6eeec55a8190e033318cee3cbc732ba8fd939617cbf762d"},
     {file = "pyzmq-22.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:7661fc1d5cb73481cf710a1418a4e1e301ed7d5d924f91c67ba84b2a1b89defd"},
     {file = "pyzmq-22.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79244b9e97948eaf38695f4b8e6fc63b14b78cc37f403c6642ba555517ac1268"},
     {file = "pyzmq-22.3.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab888624ed68930442a3f3b0b921ad7439c51ba122dbc8c386e6487a658e4a4e"},
     {file = "pyzmq-22.3.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:18cd854b423fce44951c3a4d3e686bac8f1243d954f579e120a1714096637cc0"},
     {file = "pyzmq-22.3.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:de8df0684398bd74ad160afdc2a118ca28384ac6f5e234eb0508858d8d2d9364"},
+    {file = "pyzmq-22.3.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:62bcade20813796c426409a3e7423862d50ff0639f5a2a95be4b85b09a618666"},
+    {file = "pyzmq-22.3.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:ea5a79e808baef98c48c884effce05c31a0698c1057de8fc1c688891043c1ce1"},
     {file = "pyzmq-22.3.0-cp36-cp36m-win32.whl", hash = "sha256:3c1895c95be92600233e476fe283f042e71cf8f0b938aabf21b7aafa62a8dac9"},
     {file = "pyzmq-22.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:851977788b9caa8ed011f5f643d3ee8653af02c5fc723fa350db5125abf2be7b"},
     {file = "pyzmq-22.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b4ebed0977f92320f6686c96e9e8dd29eed199eb8d066936bac991afc37cbb70"},
     {file = "pyzmq-22.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42abddebe2c6a35180ca549fadc7228d23c1e1f76167c5ebc8a936b5804ea2df"},
     {file = "pyzmq-22.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1e41b32d6f7f9c26bc731a8b529ff592f31fc8b6ef2be9fa74abd05c8a342d7"},
     {file = "pyzmq-22.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:be4e0f229cf3a71f9ecd633566bd6f80d9fa6afaaff5489492be63fe459ef98c"},
+    {file = "pyzmq-22.3.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:08c4e315a76ef26eb833511ebf3fa87d182152adf43dedee8d79f998a2162a0b"},
+    {file = "pyzmq-22.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:badb868fff14cfd0e200eaa845887b1011146a7d26d579aaa7f966c203736b92"},
     {file = "pyzmq-22.3.0-cp37-cp37m-win32.whl", hash = "sha256:7c58f598d9fcc52772b89a92d72bf8829c12d09746a6d2c724c5b30076c1f11d"},
     {file = "pyzmq-22.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2b97502c16a5ec611cd52410bdfaab264997c627a46b0f98d3f666227fd1ea2d"},
     {file = "pyzmq-22.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d728b08448e5ac3e4d886b165385a262883c34b84a7fe1166277fe675e1c197a"},
     {file = "pyzmq-22.3.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:480b9931bfb08bf8b094edd4836271d4d6b44150da051547d8c7113bf947a8b0"},
     {file = "pyzmq-22.3.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7dc09198e4073e6015d9a8ea093fc348d4e59de49382476940c3dd9ae156fba8"},
     {file = "pyzmq-22.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ca6cd58f62a2751728016d40082008d3b3412a7f28ddfb4a2f0d3c130f69e74"},
+    {file = "pyzmq-22.3.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:468bd59a588e276961a918a3060948ae68f6ff5a7fa10bb2f9160c18fe341067"},
+    {file = "pyzmq-22.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c88fa7410e9fc471e0858638f403739ee869924dd8e4ae26748496466e27ac59"},
     {file = "pyzmq-22.3.0-cp38-cp38-win32.whl", hash = "sha256:c0f84360dcca3481e8674393bdf931f9f10470988f87311b19d23cda869bb6b7"},
     {file = "pyzmq-22.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:f762442bab706fd874064ca218b33a1d8e40d4938e96c24dafd9b12e28017f45"},
     {file = "pyzmq-22.3.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:954e73c9cd4d6ae319f1c936ad159072b6d356a92dcbbabfd6e6204b9a79d356"},
@@ -6083,6 +6259,8 @@ pyzmq = [
     {file = "pyzmq-22.3.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:acebba1a23fb9d72b42471c3771b6f2f18dcd46df77482612054bd45c07dfa36"},
     {file = "pyzmq-22.3.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cf98fd7a6c8aaa08dbc699ffae33fd71175696d78028281bc7b832b26f00ca57"},
     {file = "pyzmq-22.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d072f7dfbdb184f0786d63bda26e8a0882041b1e393fbe98940395f7fab4c5e2"},
+    {file = "pyzmq-22.3.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:53f4fd13976789ffafedd4d46f954c7bb01146121812b72b4ddca286034df966"},
+    {file = "pyzmq-22.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d1b5d457acbadcf8b27561deeaa386b0217f47626b29672fa7bd31deb6e91e1b"},
     {file = "pyzmq-22.3.0-cp39-cp39-win32.whl", hash = "sha256:e6a02cf7271ee94674a44f4e62aa061d2d049001c844657740e156596298b70b"},
     {file = "pyzmq-22.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:d3dcb5548ead4f1123851a5ced467791f6986d68c656bc63bfff1bf9e36671e2"},
     {file = "pyzmq-22.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3a4c9886d61d386b2b493377d980f502186cd71d501fffdba52bd2a0880cef4f"},
@@ -6168,7 +6346,6 @@ requests = [
 requests-oauthlib = [
     {file = "requests-oauthlib-1.3.0.tar.gz", hash = "sha256:b4261601a71fd721a8bd6d7aa1cc1d6a8a93b4a9f5e96626f8e4d91e8beeaa6a"},
     {file = "requests_oauthlib-1.3.0-py2.py3-none-any.whl", hash = "sha256:7f71572defaecd16372f9006f33c2ec8c077c3cfa6f5911a9a90202beb513f3d"},
-    {file = "requests_oauthlib-1.3.0-py3.7.egg", hash = "sha256:fa6c47b933f01060936d87ae9327fead68768b69c6c9ea2109c48be30f2d4dbc"},
 ]
 rope = [
     {file = "rope-0.21.0.tar.gz", hash = "sha256:366789e069a267296889b2ee7631f9278173b5e7d468f2ea08abe26069a52aef"},
@@ -6218,6 +6395,10 @@ sentry-sdk = [
     {file = "sentry-sdk-1.4.3.tar.gz", hash = "sha256:b9844751e40710e84a457c5bc29b21c383ccb2b63d76eeaad72f7f1c808c8828"},
     {file = "sentry_sdk-1.4.3-py2.py3-none-any.whl", hash = "sha256:c091cc7115ff25fe3a0e410dbecd7a996f81a3f6137d2272daef32d6c3cfa6dc"},
 ]
+setuptools = [
+    {file = "setuptools-65.5.1-py3-none-any.whl", hash = "sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31"},
+    {file = "setuptools-65.5.1.tar.gz", hash = "sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f"},
+]
 shortuuid = [
     {file = "shortuuid-1.0.1-py3-none-any.whl", hash = "sha256:492c7402ff91beb1342a5898bd61ea953985bf24a41cd9f247409aa2e03c8f77"},
     {file = "shortuuid-1.0.1.tar.gz", hash = "sha256:3c11d2007b915c43bee3e10625f068d8a349e04f0d81f08f5fa08507427ebf1f"},
@@ -6267,6 +6448,7 @@ sphinxcontrib-serializinghtml = [
     {file = "sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd"},
 ]
 spyder = [
+    {file = "spyder-5.1.5-1-py3-none-any.whl", hash = "sha256:24b49722f7f3c724dbbf0b74bfad050e31bbdb635fb46782fd1aaa1dc615c762"},
     {file = "spyder-5.1.5-py3-none-any.whl", hash = "sha256:ab6b607e91d3bef5c77bede33ad6b72538ef407163c6644640daeadbd6713c06"},
     {file = "spyder-5.1.5.tar.gz", hash = "sha256:f9ce23187f5de18f489fb03c38d884e4484b9c7292f9831faaa029fb7752fc93"},
 ]
@@ -6330,18 +6512,27 @@ tomlkit = [
     {file = "tomlkit-0.7.2.tar.gz", hash = "sha256:d7a454f319a7e9bd2e249f239168729327e4dd2d27b17dc68be264ad1ce36754"},
 ]
 torch = [
-    {file = "torch-1.7.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:422e64e98d0e100c360993819d0307e5d56e9517b26135808ad68984d577d75a"},
-    {file = "torch-1.7.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f0aaf657145533824b15f2fd8fde8f8c67fe6c6281088ef588091f03fad90243"},
-    {file = "torch-1.7.1-cp36-none-macosx_10_9_x86_64.whl", hash = "sha256:af464a6f4314a875035e0c4c2b07517599704b214634f4ed3ad2e748c5ef291f"},
-    {file = "torch-1.7.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5d76c255a41484c1d41a9ff570b9c9f36cb85df9428aa15a58ae16ac7cfc2ea6"},
-    {file = "torch-1.7.1-cp37-cp37m-win_amd64.whl", hash = "sha256:d241c3f1c4d563e4ba86f84769c23e12606db167ee6f674eedff6d02901462e3"},
-    {file = "torch-1.7.1-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:de84b4166e3f7335eb868b51d3bbd909ec33828af27290b4171bce832a55be3c"},
-    {file = "torch-1.7.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:dd2fc6880c95e836960d86efbbc7f63d3287f2e1893c51d31f96dbfe02f0d73e"},
-    {file = "torch-1.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:e000b94be3aa58ad7f61e7d07cf379ea9366cf6c6874e68bd58ad0bdc537b3a7"},
-    {file = "torch-1.7.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:2e49cac969976be63117004ee00d0a3e3dd4ea662ad77383f671b8992825de1a"},
-    {file = "torch-1.7.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:a3793dcceb12b1e2281290cca1277c5ce86ddfd5bf044f654285a4d69057aea7"},
-    {file = "torch-1.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:6652a767a0572ae0feb74ad128758e507afd3b8396b6e7f147e438ba8d4c6f63"},
-    {file = "torch-1.7.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:38d67f4fb189a92a977b2c0a38e4f6dd413e0bf55aa6d40004696df7e40a71ff"},
+    {file = "torch-1.13.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:f68edfea71ade3862039ba66bcedf954190a2db03b0c41a9b79afd72210abd97"},
+    {file = "torch-1.13.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d2d2753519415d154de4d3e64d2eaaeefdba6b6fd7d69d5ffaef595988117700"},
+    {file = "torch-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:6c227c16626e4ce766cca5351cc62a2358a11e8e466410a298487b9dff159eb1"},
+    {file = "torch-1.13.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:49a949b8136b32b2ec0724cbf4c6678b54e974b7d68f19f1231eea21cde5c23b"},
+    {file = "torch-1.13.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:0fdd38c96230947b1ed870fed4a560252f8d23c3a2bf4dab9d2d42b18f2e67c8"},
+    {file = "torch-1.13.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:43db0723fc66ad6486f86dc4890c497937f7cd27429f28f73fb7e4d74b7482e2"},
+    {file = "torch-1.13.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e643ac8d086706e82f77b5d4dfcf145a9dd37b69e03e64177fc23821754d2ed7"},
+    {file = "torch-1.13.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:bb33a911460475d1594a8c8cb73f58c08293211760796d99cae8c2509b86d7f1"},
+    {file = "torch-1.13.0-cp37-cp37m-win_amd64.whl", hash = "sha256:220325d0f4e69ee9edf00c04208244ef7cf22ebce083815ce272c7491f0603f5"},
+    {file = "torch-1.13.0-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:cd1e67db6575e1b173a626077a54e4911133178557aac50683db03a34e2b636a"},
+    {file = "torch-1.13.0-cp37-none-macosx_11_0_arm64.whl", hash = "sha256:9197ec216833b836b67e4d68e513d31fb38d9789d7cd998a08fba5b499c38454"},
+    {file = "torch-1.13.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:fa768432ce4b8ffa29184c79a3376ab3de4a57b302cdf3c026a6be4c5a8ab75b"},
+    {file = "torch-1.13.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:635dbb99d981a6483ca533b3dc7be18ef08dd9e1e96fb0bb0e6a99d79e85a130"},
+    {file = "torch-1.13.0-cp38-cp38-win_amd64.whl", hash = "sha256:857c7d5b1624c5fd979f66d2b074765733dba3f5e1cc97b7d6909155a2aae3ce"},
+    {file = "torch-1.13.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:ef934a21da6f6a516d0a9c712a80d09c56128abdc6af8dc151bee5199b4c3b4e"},
+    {file = "torch-1.13.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:f01a9ae0d4b69d2fc4145e8beab45b7877342dddbd4838a7d3c11ca7f6680745"},
+    {file = "torch-1.13.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:9ac382cedaf2f70afea41380ad8e7c06acef6b5b7e2aef3971cdad666ca6e185"},
+    {file = "torch-1.13.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e20df14d874b024851c58e8bb3846249cb120e677f7463f60c986e3661f88680"},
+    {file = "torch-1.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:4a378f5091307381abfb30eb821174e12986f39b1cf7c4522bf99155256819eb"},
+    {file = "torch-1.13.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:922a4910613b310fbeb87707f00cb76fec328eb60cc1349ed2173e7c9b6edcd8"},
+    {file = "torch-1.13.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:47fe6228386bff6d74319a2ffe9d4ed943e6e85473d78e80502518c607d644d2"},
 ]
 tornado = [
     {file = "tornado-6.1-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:d371e811d6b156d82aa5f9a4e08b58debf97c302a35714f6f45e35139c332e32"},
@@ -6525,6 +6716,10 @@ werkzeug = [
     {file = "Werkzeug-2.0.2-py3-none-any.whl", hash = "sha256:63d3dc1cf60e7b7e35e97fa9861f7397283b75d765afcaefd993d6046899de8f"},
     {file = "Werkzeug-2.0.2.tar.gz", hash = "sha256:aa2bb6fc8dee8d6c504c0ac1e7f5f7dc5810a9903e793b6f715a9f015bdadb9a"},
 ]
+wheel = [
+    {file = "wheel-0.38.4-py3-none-any.whl", hash = "sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8"},
+    {file = "wheel-0.38.4.tar.gz", hash = "sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac"},
+]
 wrapt = [
     {file = "wrapt-1.12.1.tar.gz", hash = "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"},
 ]
diff --git a/pyproject.toml b/pyproject.toml
index ecf15a50..8e85c6d4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,13 +20,13 @@ spyder = {version = "^5.1.5", optional = true}
 cleanrl = {version = "0.5.0.dev6", optional = true, extras = ["cloud"]}
 PettingZoo = "^1.16.0"
 
-[tool.poetry.dev-dependencies]
+[tool.poetry.group.dev.dependencies]
 poetry-dynamic-versioning = "^0.13.0"
 tensorboard = "^2.5.0"
 wandb = "^0.12.1"
 trueskill = "^0.4.5"
 stable-baselines3 = "^1.1.0"
-torch = "1.7.1"
+torch = "^1.13.0"
 pytest = "^6.2.5"
 pre-commit = "^2.17.0"
 

From 7d78a360dc72615b3ee614bf53cbb3b2e15708f9 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 17:46:26 -0500
Subject: [PATCH 06/10] pre-commit

---
 experiments/ppo_gridnet_multigpu.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/experiments/ppo_gridnet_multigpu.py b/experiments/ppo_gridnet_multigpu.py
index 3ef66c9d..0f3c71bc 100644
--- a/experiments/ppo_gridnet_multigpu.py
+++ b/experiments/ppo_gridnet_multigpu.py
@@ -12,8 +12,8 @@
 import numpy as np
 import pandas as pd
 import torch
-import torch.nn as nn
 import torch.distributed as dist
+import torch.nn as nn
 import torch.optim as optim
 from gym.spaces import MultiDiscrete
 from stable_baselines3.common.vec_env import VecEnvWrapper, VecMonitor, VecVideoRecorder
@@ -386,9 +386,7 @@ def on_evaluation_done(self, future):
     envs = MicroRTSStatsRecorder(envs, args.gamma)
     envs = VecMonitor(envs)
     if args.capture_video:
-        envs = VecVideoRecorder(
-            envs, f"videos/{run_name}", record_video_trigger=lambda x: x % 100000 == 0, video_length=2000
-        )
+        envs = VecVideoRecorder(envs, f"videos/{run_name}", record_video_trigger=lambda x: x % 100000 == 0, video_length=2000)
     assert isinstance(envs.action_space, MultiDiscrete), "only MultiDiscrete action space is supported"
 
     eval_executor = None
@@ -487,7 +485,7 @@ def on_evaluation_done(self, future):
         print(
             f"local_rank: {local_rank}, action.sum(): {action.sum()}, update: {update}, agent.actor.weight.sum(): {list(agent.actor)[0].weight.sum()}"
         )
-        training_time_start =  time.time()
+        training_time_start = time.time()
         # bootstrap reward if not done. reached the batch limit
         with torch.no_grad():
             last_value = agent.get_value(next_obs).reshape(1, -1)

From 739dd2f2c6f5c44a958270841edb37940e663c30 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 17:52:02 -0500
Subject: [PATCH 07/10] update CI

---
 .github/workflows/pypi.yml  | 10 +++++-----
 .github/workflows/tests.yml |  3 ---
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
index 4625d8e3..c2774176 100644
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -25,12 +25,12 @@ jobs:
         java-version: '8.x' # The JDK version to make available on the path.
         java-package: jdk # (jre, jdk, or jdk+fx) - defaults to jdk
         architecture: x64 # (x64 or x86) - defaults to x64
-    - name: Build microrts
-      run: bash build.sh
-    - name: Run image
-      uses: abatilo/actions-poetry@v2.0.0
+    - name: Install Poetry
+      uses: snok/install-poetry@v1
       with:
-        poetry-version: 1.1.7
+        virtualenvs-create: true
+        virtualenvs-in-project: true
+        installer-parallel: true
     - name: Build a source tarball
       run: pip install poetry-dynamic-versioning && poetry install --no-dev && poetry build --format sdist
     - name: Upload artifact to S3
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c3b44c65..2d0d14a9 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -30,9 +30,6 @@ jobs:
           java-package: jdk # (jre, jdk, or jdk+fx) - defaults to jdk
           architecture: x64 # (x64 or x86) - defaults to x64
 
-      - name: Build microrts
-        run: bash build.sh
-
       - name: Install Poetry
         uses: snok/install-poetry@v1
         with:

From 2be8b4e263ad02f5af95b137b02b96219f60bd47 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 17:55:55 -0500
Subject: [PATCH 08/10] update microrts version

---
 gym_microrts/microrts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gym_microrts/microrts b/gym_microrts/microrts
index 943912d5..67664cb0 160000
--- a/gym_microrts/microrts
+++ b/gym_microrts/microrts
@@ -1 +1 @@
-Subproject commit 943912d565791fa7b3fc2f58eb612487a8508e6b
+Subproject commit 67664cb0f19d9bd1177d09dbea064dfd9b878ce7

From 413c419f0ea58b8b3f09d67477d76fb79f571d73 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 18:07:33 -0500
Subject: [PATCH 09/10] bug fix

---
 gym_microrts/envs/vec_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gym_microrts/envs/vec_env.py b/gym_microrts/envs/vec_env.py
index 075b8e4c..3b254859 100644
--- a/gym_microrts/envs/vec_env.py
+++ b/gym_microrts/envs/vec_env.py
@@ -237,7 +237,7 @@ def step_wait(self):
                     if d and done_idx % 2 == 0:
                         done_idx -= self.num_bot_envs  # recalibrate the index
                         self.vec_client.selfPlayClients[done_idx // 2].mapPath = next(self.next_map)
-                        self.vec_client.selfPlayClients[done_idx // 2].reset(0)
+                        self.vec_client.selfPlayClients[done_idx // 2].reset()
                         p0_response = self.vec_client.selfPlayClients[done_idx // 2].getResponse(0)
                         p1_response = self.vec_client.selfPlayClients[done_idx // 2].getResponse(1)
                         obs[done_idx] = self._encode_obs(np.array(p0_response.observation))

From c95730185efc782fd49f1a3bf2c6bd99c6494ae2 Mon Sep 17 00:00:00 2001
From: Costa Huang <costa.huang@outlook.com>
Date: Wed, 16 Nov 2022 18:27:34 -0500
Subject: [PATCH 10/10] update docs

---
 README.md | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 090e28db..4de0e3ac 100644
--- a/README.md
+++ b/README.md
@@ -64,30 +64,49 @@ Note that the experiments in the technical paper above are done with [`gym_micro
 
 Here is a description of Gym-μRTS's observation and action space:
 
-* **Observation Space.** (`Box(0, 1, (h, w, 27), int32)`) Given a map of size `h x w`, the observation is a tensor of shape `(h, w, n_f)`, where `n_f` is a number of feature planes that have binary values. The observation space used in this paper uses 27 feature planes as shown in the following table. A feature plane can be thought of as a concatenation of multiple one-hot encoded features. As an example, if there is a worker with hit points equal to 1, not carrying any resources, owner being Player 1, and currently not executing any actions, then the one-hot encoding features will look like the following:
-
-   `[0,1,0,0,0],  [1,0,0,0,0],  [1,0,0], [0,0,0,0,1,0,0,0],  [1,0,0,0,0,0]`
-
-
-    The 27 values of each feature plane for the position in the map of such worker will thus be:
-
-    `[0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0]`
-
-* **Partial Observation Space.** (`Box(0, 1, (h, w, 29), int32)`) Given a map of size `h x w`, the observation is a tensor of shape `(h, w, n_f)`, where `n_f` is a number of feature planes that have binary values. The observation space for partial observability uses 29 feature planes as shown in the following table. A feature plane can be thought of as a concatenation of multiple one-hot encoded features. As an example, if there is a worker with hit points equal to 1, not carrying any resources, owner being Player 1,  currently not executing any actions, and not visible to the opponent, then the one-hot encoding features will look like the following:
-
-   `[0,1,0,0,0],  [1,0,0,0,0],  [1,0,0], [0,0,0,0,1,0,0,0],  [1,0,0,0,0,0], [1,0]`
+* **Observation Space.** (`Box(0, 1, (h, w, 29), int32)`) Given a map of size `h x w`, the observation is a tensor of shape `(h, w, n_f)`, where `n_f` is a number of feature planes that have binary values. The observation space used in this paper uses 29 feature planes as shown in the following table. A feature plane can be thought of as a concatenation of multiple one-hot encoded features. As an example, the unit at a cell could be encoded as follows:
 
+    * the unit has 1 hit point -> `[0,1,0,0,0]`
+    * the unit is not carrying any resources, -> `[1,0,0,0,0]`
+    * the unit is owned by Player 1 -> `[0,1,0]`
+    * the unit is a worker -> `[0,0,0,0,1,0,0,0]`
+    * the unit is not executing any actions -> `[1,0,0,0,0]`
+    * the unit is standing at free terrain cell -> `[1,0]`
 
     The 29 values of each feature plane for the position in the map of such worker will thus be:
 
     `[0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0]`
 
+* **Partial Observation Space.** (`Box(0, 1, (h, w, 31), int32)`) under the partial observation space, there is an additional two plans indicating if the unit is visible to the opponent. For example, if the unit is visible to the opponent, the feature plane will be `[0,1]`. If the unit is not visible to the opponent, the feature plane will be `[1,0]`. Using the example above and assume the worker unit is not visible, then the 31 values of each feature plane for the position in the map of such worker will thus be:
+
+    `[0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0]`
+
 * **Action Space.** (`MultiDiscrete(concat(h * w * [[6   4   4   4   4   7 a_r]]))`) Given a map of size `h x w` and the maximum attack range `a_r=7`, the action is an (7hw)-dimensional vector of discrete values as specified in the following table. The first 7 component of the action vector represents the actions issued to the unit at `x=0,y=0`, and the second 7 component represents actions issued to the unit at `x=0,y=1`, etc. In these 7 components, the first component is the action type, and the rest of components represent the different parameters different action types can take. Depending on which action type is selected, the game engine will use the corresponding parameters to execute the action. As an example, if the RL agent issues a move south action to the worker at $x=0, y=1$ in a 2x2 map, the action will be encoded in the following way:
 
     `concat([0,0,0,0,0,0,0], [1,2,0,0,0,0,0], [0,0,0,0,0,0,0], [0,0,0,0,0,0,0]]`
     `=[0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]`
 
-![image](https://user-images.githubusercontent.com/5555347/120344517-a5bf7300-c2c7-11eb-81b6-172813ba8a0b.png)
+<!-- ![image](https://user-images.githubusercontent.com/5555347/120344517-a5bf7300-c2c7-11eb-81b6-172813ba8a0b.png) -->
+
+Here is a table summarizing observation features and action components, where $a_r=7$ is the maximum attack range and `-` means not applicable.
+
+| Observation Features        | Planes             | Description                                              |
+|-----------------------------|--------------------|----------------------------------------------------------|
+| Hit Points                  | 5                  | 0, 1, 2, 3, $\geq 4$                                     |
+| Resources                   | 5                  | 0, 1, 2, 3, $\geq 4$                                     |
+| Owner                       | 3                  | -,player 1, player 2       |
+| Unit Types                  | 8                  | -, resource, base, barrack, worker, light, heavy, ranged |
+| Current Action              | 6                  | -, move, harvest, return, produce, attack                |
+| Action Components           | Range              | Description                                              |
+| Source Unit                 | $[0,h \times w-1]$ | the location of the unit selected to perform an action   |
+| Action Type                 | $[0,5]$            | NOOP, move, harvest, return, produce, attack             |
+| Move Parameter              | $[0,3]$            | north, east, south, west                                 |
+| Harvest Parameter           | $[0,3]$            | north, east, south, west                                 |
+| Return Parameter            | $[0,3]$            | north, east, south, west                                 |
+| Produce Direction Parameter | $[0,3]$            | north, east, south, west                                 |
+| Produce Type Parameter      | $[0,6]$            | resource, base, barrack, worker, light, heavy, ranged    |
+| Relative Attack Position    | $[0,a_r^2 - 1]$    | the relative location of the unit that  will be attacked |
+
 
 ## Evaluation