wandb · tcapelle · Jul 5, 2023 · Jul 5, 2023 · Jul 5, 2023 · Jul 6, 2023
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
@@ -25,3 +25,4 @@ __pycache__/
 *$py.class
 
 .idea/
+/**/Tiny*
diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb
@@ -0,0 +1,248 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "dbfb9335",
+   "metadata": {},
+   "source": [
+    "# Introduction to W&B\n",
+    "\n",
+    "We will add `wandb` to sprite classification model training, so that we can track and visualize important metrics, gain insights into our model's behavior and make informed decisions for model improvements. We will also see how to compare and analyze different experiments, collaborate with team members, and reproduce results effectively."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9ba792c-2baa-4c19-a132-2ed82a759e79",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "from pathlib import Path\n",
+    "from types import SimpleNamespace\n",
+    "from tqdm.auto import tqdm\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "from torch.optim import Adam\n",
+    "from utilities import get_dataloaders\n",
+    "\n",
+    "import wandb"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2e0bfcc9",
+   "metadata": {},
+   "source": [
+    "### Sprite classification\n",
+    "\n",
+    "We will build a simple model to classify sprites. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d51a9f7f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "INPUT_SIZE = 3 * 16 * 16\n",
+    "OUTPUT_SIZE = 5\n",
+    "HIDDEN_SIZE = 256\n",
+    "NUM_WORKERS = 2\n",
+    "CLASSES = [\"hero\", \"non-hero\", \"food\", \"spell\", \"side-facing\"]\n",
+    "DATA_DIR = Path('./data/')\n",
+    "DEVICE = torch.device(\"cuda\" if torch.cuda.is_available()  else \"cpu\")\n",
+    "\n",
+    "def get_model(dropout):\n",
+    "    \"Simple MLP with Dropout\"\n",
+    "    return nn.Sequential(\n",
+    "        nn.Flatten(),\n",
+    "        nn.Linear(INPUT_SIZE, HIDDEN_SIZE),\n",
+    "        nn.BatchNorm1d(HIDDEN_SIZE),\n",
+    "        nn.ReLU(),\n",
+    "        nn.Dropout(dropout),\n",
+    "        nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE)\n",
+    "    ).to(DEVICE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let's define a config object to store our hyperparameters\n",
+    "config = SimpleNamespace(\n",
+    "    epochs = 2,\n",
+    "    batch_size = 128,\n",
+    "    lr = 1e-5,\n",
+    "    dropout = 0.5,\n",
+    "    slice_size = 10_000,\n",
+    "    valid_pct = 0.2,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e5492ebb-2dfa-44ce-af6c-24655e45a2ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_model(config):\n",
+    "    \"Train a model with a given config\"\n",
+    "    # Start a wandb run\n",
+    "    wandb.init(\n",
+    "        project=\"dlai-intro\",\n",
+    "        config=config,\n",
+    "    )\n",
+    "    # Get the data\n",
+    "    train_dl, valid_dl = get_dataloaders(DATA_DIR, \n",
+    "                                         config.batch_size, \n",
+    "                                         config.slice_size, \n",
+    "                                         config.valid_pct)\n",
+    "    n_steps_per_epoch = math.ceil(len(train_dl.dataset) / config.batch_size)\n",
+    "\n",
+    "    # A simple MLP model\n",
+    "    model = get_model(config.dropout)\n",
+    "\n",
+    "    # Make the loss and optimizer\n",
+    "    loss_func = nn.CrossEntropyLoss()\n",
+    "    optimizer = Adam(model.parameters(), lr=config.lr)\n",
+    "\n",
+    "    example_ct = 0\n",
+    "\n",
+    "    for epoch in tqdm(range(config.epochs), total=config.epochs):\n",
+    "        model.train()\n",
+    "\n",
+    "        for step, (images, labels) in enumerate(train_dl):\n",
+    "            images, labels = images.to(DEVICE), labels.to(DEVICE)\n",
+    "\n",
+    "            outputs = model(images)\n",
+    "            train_loss = loss_func(outputs, labels)\n",
+    "            optimizer.zero_grad()\n",
+    "            train_loss.backward()\n",
+    "            optimizer.step()\n",
+    "\n",
+    "            example_ct += len(images)\n",
+    "            metrics = {\n",
+    "                \"train/train_loss\": train_loss,\n",
+    "                \"train/epoch\": epoch + 1,\n",
+    "                \"train/example_ct\": example_ct\n",
+    "            }\n",
+    "            # log training metrics to wandb\n",
+    "            wandb.log(metrics)\n",
+    "            \n",
+    "        # Compute validation metrics, log images on last epoch\n",
+    "        val_loss, accuracy = validate_model(model, valid_dl, loss_func)\n",
+    "        # Compute train and validation metrics\n",
+    "        val_metrics = {\n",
+    "            \"val/val_loss\": val_loss,\n",
+    "            \"val/val_accuracy\": accuracy\n",
+    "        }\n",
+    "        # log validation metrics to wandb\n",
+    "        wandb.log(val_metrics)\n",
+    "    \n",
+    "    wandb.finish()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8401cf96",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def validate_model(model, valid_dl, loss_func):\n",
+    "    \"Compute the performance of the model on the validation dataset\"\n",
+    "    model.eval()\n",
+    "    val_loss = 0.0\n",
+    "    correct = 0\n",
+    "\n",
+    "    with torch.inference_mode():\n",
+    "        for i, (images, labels) in enumerate(valid_dl):\n",
+    "            images, labels = images.to(DEVICE), labels.to(DEVICE)\n",
+    "\n",
+    "            # Forward pass\n",
+    "            outputs = model(images)\n",
+    "            val_loss += loss_func(outputs, labels) * labels.size(0)\n",
+    "\n",
+    "            # Compute accuracy and accumulate\n",
+    "            _, predicted = torch.max(outputs.data, 1)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "            \n",
+    "    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c4cac7d2",
+   "metadata": {},
+   "source": [
+    "### W&B account\n",
+    "[Sign up](https://wandb.ai/site) for a free account at https://wandb.ai/site and then login to your wandb account to store the results of your experiments and use advanced W&B features. You can also continue to learn in anonymous mode. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wandb.login(anonymous=\"allow\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3df2485",
+   "metadata": {},
+   "source": [
+    "### Train model\n",
+    "Let's train the model with default config and check how it's doing in W&B. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_model(config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e7c186f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		*.npy filter=lfs diff=lfs merge=lfs -text
		*.pth filter=lfs diff=lfs merge=lfs -text
Original file line number	Diff line number	Diff line change
Expand Up		@@ -25,3 +25,4 @@ __pycache__/
		*$py.class

		.idea/
		/*/Tiny