From f1373a38fff35b4eaaad4d1f5261f43b04e9a37b Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Wed, 5 Jul 2023 18:13:54 +0200
Subject: [PATCH 01/43] add code

---
 dlai/00_intro.ipynb              | 577 ++++++++++++++++++++++++
 dlai/01_diffusion_training.ipynb | 344 +++++++++++++++
 dlai/02_diffusion_sampling.ipynb | 735 +++++++++++++++++++++++++++++++
 dlai/03_llm_eval.ipynb           | 364 +++++++++++++++
 dlai/README.md                   |  13 +
 dlai/requirements.txt            |   7 +
 dlai/utilities.py                | 376 ++++++++++++++++
 7 files changed, 2416 insertions(+)
 create mode 100644 dlai/00_intro.ipynb
 create mode 100644 dlai/01_diffusion_training.ipynb
 create mode 100644 dlai/02_diffusion_sampling.ipynb
 create mode 100644 dlai/03_llm_eval.ipynb
 create mode 100644 dlai/README.md
 create mode 100644 dlai/requirements.txt
 create mode 100644 dlai/utilities.py

diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb
new file mode 100644
index 00000000..1cc2ecb6
--- /dev/null
+++ b/dlai/00_intro.ipynb
@@ -0,0 +1,577 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e9ba792c-2baa-4c19-a132-2ed82a759e79",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "from types import SimpleNamespace\n",
+    "\n",
+    "import wandb\n",
+    "from tqdm.auto import tqdm\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "from torch.optim import Adam\n",
+    "from torch.utils.data import DataLoader, Subset\n",
+    "\n",
+    "from utilities import *\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d51a9f7f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Constants\n",
+    "INPUT_SIZE = 3 * 16 * 16\n",
+    "OUTPUT_SIZE = 5\n",
+    "HIDDEN_SIZE = 256\n",
+    "NUM_WORKERS = 2\n",
+    "CLASSES = [\"hero\", \"non-hero\", \"food\", \"spell\", \"side-facing\"]\n",
+    "\n",
+    "# Device\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available()  else \"cpu\")\n",
+    "device = torch.device(\"mps\" if torch.backends.mps.is_built() else \"cpu\")\n",
+    "\n",
+    "data_dir = './data/'\n",
+    "\n",
+    "def get_dataloaders(batch_size, slice_size=None, valid_pct=0.2):\n",
+    "    dataset = CustomDataset.from_np(data_dir + \"sprites_1788_16x16.npy\", data_dir + \"sprite_labels_nc_1788_16x16.npy\", argmax=True)\n",
+    "\n",
+    "    if slice_size:\n",
+    "        dataset = dataset.subset(slice_size)\n",
+    "\n",
+    "    train_ds, valid_ds = dataset.split(valid_pct)\n",
+    "\n",
+    "    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=1)    \n",
+    "    valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=1)\n",
+    "\n",
+    "    return train_dl, valid_dl\n",
+    "\n",
+    "def get_model(dropout):\n",
+    "    return nn.Sequential(\n",
+    "        nn.Flatten(),\n",
+    "        nn.Linear(INPUT_SIZE, HIDDEN_SIZE),\n",
+    "        nn.BatchNorm1d(HIDDEN_SIZE),\n",
+    "        nn.ReLU(),\n",
+    "        nn.Dropout(dropout),\n",
+    "        nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE)\n",
+    "    ).to(device)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8700b5fa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sprite shape: (89400, 16, 16, 3)\n",
+      "labels shape: (89400,)\n",
+      "sprite shape: (1000, 16, 16, 3)\n",
+      "labels shape: (1000,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "train_dl, valid_dl = get_dataloaders(128, slice_size=1000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "8401cf96",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):\n",
+    "    \"Compute the performance of the model on the validation dataset and log a wandb.Table\"\n",
+    "    model.eval()\n",
+    "    val_loss = 0.0\n",
+    "    correct = 0\n",
+    "\n",
+    "    with torch.inference_mode():\n",
+    "        for i, (images, labels) in enumerate(valid_dl):\n",
+    "            images, labels = images.to(device), labels.to(device)\n",
+    "\n",
+    "            # Forward pass\n",
+    "            outputs = model(images)\n",
+    "            val_loss += loss_func(outputs, labels) * labels.size(0)\n",
+    "\n",
+    "            # Compute accuracy and accumulate\n",
+    "            _, predicted = torch.max(outputs.data, 1)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "\n",
+    "            # Log one batch of images to the dashboard, always same batch_idx.\n",
+    "            if i == batch_idx and log_images:\n",
+    "                log_image_predictions_table(images, predicted, labels, outputs.softmax(dim=1))\n",
+    "\n",
+    "    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n",
+    "\n",
+    "\n",
+    "def log_image_predictions_table(images, predicted, labels, probs):\n",
+    "    \"Create a wandb Table to log images, labels, and predictions\"\n",
+    "    table = wandb.Table(columns=[\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)])\n",
+    "    \n",
+    "    for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n",
+    "        table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n",
+    "    \n",
+    "    wandb.log({\"predictions_table\": table}, commit=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "e5492ebb-2dfa-44ce-af6c-24655e45a2ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_model(config):\n",
+    "    \"Train a model with a given config\"\n",
+    "    wandb.init(\n",
+    "        project=\"deeplearningai-intro\",\n",
+    "        config=config\n",
+    "    )\n",
+    "\n",
+    "    # Get the data\n",
+    "    train_dl, valid_dl = get_dataloaders(config.batch_size, config.slice_size, config.valid_pct)\n",
+    "    n_steps_per_epoch = math.ceil(len(train_dl.dataset) / config.batch_size)\n",
+    "\n",
+    "    # A simple MLP model\n",
+    "    model = get_model(config.dropout)\n",
+    "\n",
+    "    # Make the loss and optimizer\n",
+    "    loss_func = nn.CrossEntropyLoss()\n",
+    "    optimizer = Adam(model.parameters(), lr=config.lr)\n",
+    "\n",
+    "    example_ct = 0\n",
+    "\n",
+    "    for epoch in tqdm(range(config.epochs), total=config.epochs):\n",
+    "        model.train()\n",
+    "\n",
+    "        for step, (images, labels) in enumerate(train_dl):\n",
+    "            images, labels = images.to(device), labels.to(device)\n",
+    "\n",
+    "            outputs = model(images)\n",
+    "            train_loss = loss_func(outputs, labels)\n",
+    "            optimizer.zero_grad()\n",
+    "            train_loss.backward()\n",
+    "            optimizer.step()\n",
+    "\n",
+    "            example_ct += len(images)\n",
+    "            metrics = {\n",
+    "                \"train/train_loss\": train_loss,\n",
+    "                \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch)) / n_steps_per_epoch,\n",
+    "                \"train/example_ct\": example_ct\n",
+    "            }\n",
+    "\n",
+    "            if step + 1 < n_steps_per_epoch:\n",
+    "                # Log train metrics to wandb \n",
+    "                wandb.log(metrics)\n",
+    "                \n",
+    "        val_loss, accuracy = validate_model(model, valid_dl, loss_func, log_images=(epoch == (config.epochs - 1)))\n",
+    "\n",
+    "        # Log train and validation metrics to wandb\n",
+    "        val_metrics = {\n",
+    "            \"val/val_loss\": val_loss,\n",
+    "            \"val/val_accuracy\": accuracy\n",
+    "        }\n",
+    "        wandb.log({**metrics, **val_metrics})\n",
+    "\n",
+    "    # If you had a test set, this is how you could log it as a Summary metric\n",
+    "    wandb.run.summary['test_accuracy'] = 0.8\n",
+    "\n",
+    "    wandb.finish()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = SimpleNamespace(\n",
+    "    epochs = 3,\n",
+    "    batch_size = 128,\n",
+    "    lr = 1e-3,\n",
+    "    dropout = 0.1,\n",
+    "    slice_size = 1000,\n",
+    "    valid_pct = 0.2,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.4"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/Users/tcapelle/work/dlai/wandb/run-20230705_144549-dg3tar8b</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b' target=\"_blank\">morning-jazz-7</a></strong> to <a href='https://wandb.ai/capecape/deeplearningai-intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/capecape/deeplearningai-intro' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sprite shape: (89400, 16, 16, 3)\n",
+      "labels shape: (89400,)\n",
+      "sprite shape: (1000, 16, 16, 3)\n",
+      "labels shape: (1000,)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c2a535bd5a0d481e9916ab5f71edbf41",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "70d7c9e0e2fb4edd969b284271d72c6f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='0.161 MB of 0.161 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<style>\n",
+       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
+       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
+       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
+       "    </style>\n",
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇██</td></tr><tr><td>train/example_ct</td><td>▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇███</td></tr><tr><td>train/train_loss</td><td>█▆▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁▆█</td></tr><tr><td>val/val_loss</td><td>█▃▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>2400</td></tr><tr><td>train/train_loss</td><td>0.21717</td></tr><tr><td>val/val_accuracy</td><td>0.92</td></tr><tr><td>val/val_loss</td><td>0.32078</td></tr></table><br/></div></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">morning-jazz-7</strong> at: <a href='https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b</a><br/>Synced 7 W&B file(s), 1 media file(s), 126 artifact file(s) and 2 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20230705_144549-dg3tar8b/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "train_model(config)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "e9ecf01d",
+   "metadata": {},
+   "source": [
+    "Let's try with another value of dropout:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "4f40520a-66f8-4415-9e36-174dda06aca0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.4"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/Users/tcapelle/work/dlai/wandb/run-20230705_144416-iysb84lz</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz' target=\"_blank\">icy-water-6</a></strong> to <a href='https://wandb.ai/capecape/deeplearningai-intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/capecape/deeplearningai-intro' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sprite shape: (89400, 16, 16, 3)\n",
+      "labels shape: (89400,)\n",
+      "sprite shape: (1000, 16, 16, 3)\n",
+      "labels shape: (1000,)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e576d9f253b6400d80f8022dafbdd326",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7bd6b3a334c94ce58afd7c25e10d8d5a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='0.161 MB of 0.161 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<style>\n",
+       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
+       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
+       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
+       "    </style>\n",
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇██</td></tr><tr><td>train/example_ct</td><td>▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇███</td></tr><tr><td>train/train_loss</td><td>█▆▅▄▄▃▃▂▂▂▂▁▂▂▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁██</td></tr><tr><td>val/val_loss</td><td>█▂▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>2400</td></tr><tr><td>train/train_loss</td><td>0.27209</td></tr><tr><td>val/val_accuracy</td><td>0.92</td></tr><tr><td>val/val_loss</td><td>0.32183</td></tr></table><br/></div></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">icy-water-6</strong> at: <a href='https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz</a><br/>Synced 7 W&B file(s), 1 media file(s), 126 artifact file(s) and 2 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20230705_144416-iysb84lz/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "config.dropout = 0.5\n",
+    "train_model(config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bba77c03-fcd2-43ef-9a11-8cebef617c23",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
new file mode 100644
index 00000000..46530b6b
--- /dev/null
+++ b/dlai/01_diffusion_training.ipynb
@@ -0,0 +1,344 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "958524a2-cb56-439e-850e-032dd10478f2",
+   "metadata": {},
+   "source": [
+    "# Training a Diffusion Model with W&B\n",
+    "\n",
+    "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook and add:\n",
+    "- Logging of the training loss\n",
+    "- Sampling from the model during training and logging the samples to W&B\n",
+    "- Saving the model checkpoints to W&B"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "700e687c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from tqdm.notebook import tqdm\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "from torch.utils.data import DataLoader\n",
+    "import numpy as np\n",
+    "from utilities import *\n",
+    "\n",
+    "import wandb"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "7c0d229a",
+   "metadata": {},
+   "source": [
+    "# Setting Things Up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "54c3a942",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# hyperparameters\n",
+    "num_samples = 32\n",
+    "\n",
+    "# diffusion hyperparameters\n",
+    "timesteps = 500\n",
+    "beta1 = 1e-4\n",
+    "beta2 = 0.02\n",
+    "\n",
+    "# network hyperparameters\n",
+    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n",
+    "# device = \"mps\"\n",
+    "n_feat = 64 # 64 hidden dimension feature\n",
+    "n_cfeat = 5 # context vector is of size 5\n",
+    "height = 16 # 16x16 image\n",
+    "data_dir = './data/'\n",
+    "save_dir = './data/weights/'\n",
+    "if not os.path.exists(save_dir):\n",
+    "    os.mkdir(save_dir)\n",
+    "\n",
+    "# training hyperparameters\n",
+    "batch_size = 100\n",
+    "n_epoch = 32\n",
+    "lrate=1e-3\n",
+    "\n",
+    "# we are storing the parameters in a dictionary to be logged to wandb\n",
+    "config = dict(\n",
+    "    num_samples=num_samples,\n",
+    "    timesteps=timesteps,\n",
+    "    beta1=beta1,\n",
+    "    beta2=beta2,\n",
+    "    device=device,\n",
+    "    n_feat=n_feat,\n",
+    "    n_cfeat=n_cfeat,\n",
+    "    height=height,\n",
+    "    save_dir=save_dir,\n",
+    "    batch_size=batch_size,\n",
+    "    n_epoch=n_epoch,\n",
+    "    lrate=lrate,\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "bb43f98f",
+   "metadata": {},
+   "source": [
+    "All this is the same as the previous notebook, except for the addition of the context vector size n_cfeat. We will use this to condition the diffusion model on a context vector."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a705d0a8",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# construct DDPM noise schedule\n",
+    "b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1\n",
+    "a_t = 1 - b_t\n",
+    "ab_t = torch.cumsum(a_t.log(), dim=0).exp()    \n",
+    "ab_t[0] = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "6bc9001e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# construct model\n",
+    "nn_model = ContextUnet(in_channels=3, n_feat=n_feat, n_cfeat=n_cfeat, height=height).to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "76c63b85",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sprite shape: (89400, 16, 16, 3)\n",
+      "labels shape: (89400, 5)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# load dataset and construct optimizer\n",
+    "dataset = CustomDataset.from_np(data_dir + \"sprites_1788_16x16.npy\", data_dir + \"sprite_labels_nc_1788_16x16.npy\")\n",
+    "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)\n",
+    "optim = torch.optim.Adam(nn_model.parameters(), lr=lrate, eps=1e-5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb13689d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# helper function: perturbs an image to a specified noise level\n",
+    "def perturb_input(x, t, noise):\n",
+    "    return ab_t.sqrt()[t, None, None, None] * x + (1 - ab_t[t, None, None, None]) * noise"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "fe8eb277",
+   "metadata": {},
+   "source": [
+    "## Sampling"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3",
+   "metadata": {},
+   "source": [
+    "We will need to instrument the sampler to have telemetry on the generated images while training!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b0f5bed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# helper function; removes the predicted noise (but adds some noise back in to avoid collapse)\n",
+    "def denoise_add_noise(x, t, pred_noise, z=None):\n",
+    "    if z is None:\n",
+    "        z = torch.randn_like(x)\n",
+    "    noise = b_t.sqrt()[t] * z\n",
+    "    mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt()\n",
+    "    return mean + noise"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16085a65",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# sample with context using standard algorithm\n",
+    "# we make a change to the original algorithm to allow for context explicitely (the noises)\n",
+    "@torch.no_grad()\n",
+    "def sample_ddpm_context(samples, context, save_rate=20):\n",
+    "    # array to keep track of generated steps for plotting\n",
+    "    intermediate = [] \n",
+    "    for i in range(timesteps, 0, -1):\n",
+    "        # reshape time tensor\n",
+    "        t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n",
+    "\n",
+    "        # sample some random noise to inject back in. For i = 1, don't add back in noise\n",
+    "        z = torch.randn_like(samples) if i > 1 else 0\n",
+    "\n",
+    "        eps = nn_model(samples, t, c=context)    # predict noise e_(x_t,t, ctx)\n",
+    "        samples = denoise_add_noise(samples, i, eps, z)\n",
+    "        if i % save_rate==0 or i==timesteps or i<8:\n",
+    "            intermediate.append(samples.detach().cpu().numpy())\n",
+    "\n",
+    "    intermediate = np.stack(intermediate)\n",
+    "    return samples.clip(-1, 1), intermediate"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "d9ed46d7",
+   "metadata": {},
+   "source": [
+    "# Training"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c",
+   "metadata": {},
+   "source": [
+    "we choose a fixed context vector with 6 of each class, this way we know what to expect on the workspace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d88afdba",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Noise vector\n",
+    "# x_T ~ N(0, 1), sample initial noise\n",
+    "noises = torch.randn(num_samples, 3, height, height).to(device)  \n",
+    "\n",
+    "# A fixed context vector to sample from\n",
+    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5f4af69",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# same code as before, added comments on the extra W&B instrumentation lines\n",
+    "# create a wandb run\n",
+    "run = wandb.init(project=\"dlai_diffusion\", job_type=\"train_conditional\", config=config)\n",
+    "\n",
+    "for ep in range(n_epoch):\n",
+    "    # set into train mode\n",
+    "    nn_model.train()\n",
+    "    optim.param_groups[0]['lr'] = lrate*(1-ep/n_epoch)\n",
+    "    \n",
+    "    pbar = tqdm(dataloader, leave=False)\n",
+    "    for x, c in pbar:   # x: images  c: context\n",
+    "        optim.zero_grad()\n",
+    "        x = x.to(device)\n",
+    "        c = c.to(x)   \n",
+    "        context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n",
+    "        c = c * context_mask.unsqueeze(-1)        \n",
+    "        noise = torch.randn_like(x)\n",
+    "        t = torch.randint(1, timesteps + 1, (x.shape[0],)).to(device) \n",
+    "        x_pert = perturb_input(x, t, noise)      \n",
+    "        pred_noise = nn_model(x_pert, t / timesteps, c=c)      \n",
+    "        loss = F.mse_loss(pred_noise, noise)\n",
+    "        loss.backward()    \n",
+    "        optim.step()\n",
+    "\n",
+    "        # we log the relevant metrics to the workspace\n",
+    "        wandb.log({\"loss\": loss.item(),\n",
+    "                   \"lr\": optim.param_groups[0]['lr'],\n",
+    "                   \"epoch\": ep})\n",
+    "\n",
+    "    # save model periodically\n",
+    "    if ep%4==0 or ep == int(n_epoch-1):\n",
+    "        nn_model.eval()\n",
+    "        ckpt_file = save_dir + f\"context_model_{ep}.pth\"\n",
+    "        torch.save(nn_model.state_dict(), ckpt_file)\n",
+    "\n",
+    "        # save model to wandb as an Artifact\n",
+    "        artifact_name = f\"{wandb.run.id}_context_model\"\n",
+    "        at = wandb.Artifact(artifact_name, type=\"model\", \n",
+    "                            metadata={\"loss\":loss.item(), \"epoch\":ep})\n",
+    "        at.add_file(ckpt_file)\n",
+    "        wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n",
+    "\n",
+    "        # sample the model and log the images to W&B\n",
+    "        samples, _ = sample_ddpm_context(noises, ctx_vector[:num_samples])\n",
+    "        wandb.log({\"train_samples\": [wandb.Image(img) for img in samples.split(1)]})\n",
+    "\n",
+    "# finish W&B run\n",
+    "wandb.finish()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
new file mode 100644
index 00000000..23cd1f26
--- /dev/null
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -0,0 +1,735 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "958524a2-cb56-439e-850e-032dd10478f2",
+   "metadata": {},
+   "source": [
+    "# Sampling from a diffusion model\n",
+    "In this notebooks we will sampled from the previously trained diffusion model.\n",
+    "- We are going to compare the samples from DDPM and DDIM samplers\n",
+    "- Visualize mixing samples with conditional diffusion models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "700e687c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "import numpy as np\n",
+    "from utilities import *\n",
+    "\n",
+    "import wandb"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "7c0d229a",
+   "metadata": {},
+   "source": [
+    "# Setting Things Up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "54c3a942",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Wandb Params\n",
+    "project = \"debug_dlai\"\n",
+    "entity = \"capecape\"\n",
+    "\n",
+    "# ddpm sampler hyperparameters\n",
+    "timesteps = 500\n",
+    "beta1 = 1e-4\n",
+    "beta2 = 0.02\n",
+    "num_samples = 32\n",
+    "height = 16\n",
+    "ddim_n = 25\n",
+    "\n",
+    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n",
+    "\n",
+    "device = \"mps\"\n",
+    "\n",
+    "# we are storing the parameters in a dictionary to be logged to wandb\n",
+    "config = dict(\n",
+    "    timesteps=timesteps,\n",
+    "    beta1=beta1,\n",
+    "    beta2=beta2,\n",
+    "    num_samples=num_samples,\n",
+    "    height=height,\n",
+    "    ddim_n=ddim_n,\n",
+    "    device=device,\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "bb43f98f",
+   "metadata": {},
+   "source": [
+    "We will load the model from a wandb.Artifact and set up the sampling loop."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "8ab66255",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_model(model_artifact_name):\n",
+    "    \"Load the model from wandb artifacts\"\n",
+    "    api = wandb.Api()\n",
+    "    artifact = api.artifact(model_artifact_name, type=\"model\")\n",
+    "    model_path = artifact.download()\n",
+    "\n",
+    "    # recover model info from the registry\n",
+    "    producer_run = artifact.logged_by()\n",
+    "\n",
+    "    # load the weights dictionary\n",
+    "    model_weights = torch.load(model_path + f\"/context_model_31.pth\", map_location=\"cpu\")\n",
+    "\n",
+    "    # create the model\n",
+    "    model = ContextUnet(in_channels=3, \n",
+    "                        n_feat=producer_run.config[\"n_feat\"], \n",
+    "                        n_cfeat=producer_run.config[\"n_cfeat\"], \n",
+    "                        height=producer_run.config[\"height\"])\n",
+    "    \n",
+    "    # load the weights into the model\n",
+    "    model.load_state_dict(model_weights)\n",
+    "\n",
+    "    # set the model to eval mode\n",
+    "    model.eval()\n",
+    "    return model.to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "b47633e2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m:   1 of 1 files downloaded.  \n"
+     ]
+    }
+   ],
+   "source": [
+    "nn_model = load_model('capecape/dlai_diffusion/w1r7jpji_context_model:v8')"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "fe8eb277",
+   "metadata": {},
+   "source": [
+    "## Sampling"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3",
+   "metadata": {},
+   "source": [
+    "We will sample and log the generated samples to wandb."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f6f479d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# construct DDPM noise schedule\n",
+    "b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1\n",
+    "a_t = 1 - b_t\n",
+    "ab_t = torch.cumsum(a_t.log(), dim=0).exp()    \n",
+    "ab_t[0] = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "8b0f5bed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# helper function; removes the predicted noise (but adds some noise back in to avoid collapse)\n",
+    "def denoise_add_noise(x, t, pred_noise, z=None):\n",
+    "    if z is None:\n",
+    "        z = torch.randn_like(x)\n",
+    "    noise = b_t.sqrt()[t] * z\n",
+    "    mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt()\n",
+    "    return mean + noise"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "16085a65",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# sample with context using standard algorithm\n",
+    "# we make a change to the original algorithm to allow for context and passing a noise tensor (samples)\n",
+    "@torch.no_grad()\n",
+    "def sample_ddpm_context(samples, context, save_rate=20):\n",
+    "    # array to keep track of generated steps for plotting\n",
+    "    intermediate = [] \n",
+    "    for i in range(timesteps, 0, -1):\n",
+    "        # reshape time tensor\n",
+    "        t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n",
+    "\n",
+    "        # sample some random noise to inject back in. For i = 1, don't add back in noise\n",
+    "        z = torch.randn_like(samples) if i > 1 else 0\n",
+    "\n",
+    "        eps = nn_model(samples, t, c=context)    # predict noise e_(x_t,t, ctx)\n",
+    "        samples = denoise_add_noise(samples, i, eps, z)\n",
+    "        if i % save_rate==0 or i==timesteps or i<8:\n",
+    "            print(f'sampling timestep {i:3d}', end='\\r')\n",
+    "            intermediate.append(samples.detach().cpu().numpy())\n",
+    "\n",
+    "    intermediate = np.stack(intermediate)\n",
+    "    return samples.clip(-1, 1), intermediate"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c",
+   "metadata": {},
+   "source": [
+    "Let's define a set of noises and a context vector to condition on."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d88afdba",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Noise vector\n",
+    "# x_T ~ N(0, 1), sample initial noise\n",
+    "noises = torch.randn(num_samples, 3, height, height).to(device)  \n",
+    "\n",
+    "# A fixed context vector to sample from\n",
+    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "1cbf9ef8-619a-4052-a138-a88c0f0f8b0b",
+   "metadata": {},
+   "source": [
+    "Let's bring that faster DDIM sampler from the diffusion course."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define sampling function for DDIM   \n",
+    "# removes the noise using ddim\n",
+    "def denoise_ddim(x, t, t_prev, pred_noise):\n",
+    "    ab = ab_t[t]\n",
+    "    ab_prev = ab_t[t_prev]\n",
+    "    \n",
+    "    x0_pred = ab_prev.sqrt() / ab.sqrt() * (x - (1 - ab).sqrt() * pred_noise)\n",
+    "    dir_xt = (1 - ab_prev).sqrt() * pred_noise\n",
+    "\n",
+    "    return x0_pred + dir_xt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "5fdfc048-47f0-43b5-983e-da715e1ed562",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# fast sampling algorithm with context\n",
+    "@torch.no_grad()\n",
+    "def sample_ddim_context(samples, context, n=25): \n",
+    "    # array to keep track of generated steps for plotting\n",
+    "    intermediate = [] \n",
+    "    step_size = timesteps // n\n",
+    "    for i in range(timesteps, 0, -step_size):\n",
+    "        print(f'sampling timestep {i:3d}', end='\\r')\n",
+    "\n",
+    "        # reshape time tensor\n",
+    "        t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n",
+    "\n",
+    "        eps = nn_model(samples, t, c=context)    # predict noise e_(x_t,t)\n",
+    "        samples = denoise_ddim(samples, i, i - step_size, eps)\n",
+    "        intermediate.append(samples.detach().cpu().numpy())\n",
+    "\n",
+    "    intermediate = np.stack(intermediate)\n",
+    "    return samples.clip(-1, 1), intermediate"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "aee10774-ff79-4df7-9b2d-1908561c23e5",
+   "metadata": {},
+   "source": [
+    "Let's create a `wandb.Table` to store our generations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "table = wandb.Table(columns=[\"input_noise\", \"ddpm\", \"ddim\", \"class\"])"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "90b838be-8fa1-4c12-9c4f-e40dfacc08e1",
+   "metadata": {},
+   "source": [
+    "let's compute ddpm samples as before"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "89e24210-4885-4559-92e1-db10566ef5ea",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampling timestep   1\r"
+     ]
+    }
+   ],
+   "source": [
+    "ddpm_samples, _ = sample_ddpm_context(noises, ctx_vector)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "836584a1-26b5-45b1-98c9-0c45d639c8f9",
+   "metadata": {},
+   "source": [
+    "For DDIM we can control the step size by the `n` param:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "25b07c26-0ac2-428a-8351-34f8b7228074",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampling timestep  20\r"
+     ]
+    }
+   ],
+   "source": [
+    "ddim_samples, _ = sample_ddim_context(noises, ctx_vector, n=ddim_n)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "af33d3c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def ctx_to_classes(ctx_vector):\n",
+    "    classes = \"hero,non-hero,food,spell,side-facing\".split(\",\")\n",
+    "    return [classes[i] for i in [ctx_vector[i].argmax().item() for i in range(ctx_vector.shape[0])]]"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "daea8275-0356-452e-a9f9-2824ef53f1ea",
+   "metadata": {},
+   "source": [
+    "Let's keep track of the sampling params on a dictionary"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "85be303d-0f0b-4df4-8c87-bd1bfb6145a2",
+   "metadata": {},
+   "source": [
+    "We can add the rows to the table one by one, we also cast images to `wandb.Image` so we can render them correctly in the UI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for noise, ddpm_sample, ddim_sample, c in zip(noises, ddpm_samples, ddim_samples, ctx_to_classes(ctx_vector)):\n",
+    "    table.add_data(wandb.Image(noise), \n",
+    "                   wandb.Image(ddpm_sample), \n",
+    "                   wandb.Image(ddim_sample),\n",
+    "                   c)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "987cee86-2db1-4a2a-9d14-f70c6248ecb9",
+   "metadata": {},
+   "source": [
+    "we log the table to W&B, we can also use `wandb.init` as a context manager, this way we ensure that the run is finished when exiting the manager"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.4"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/Users/tcapelle/work/dlai/wandb_diffusion/wandb/run-20230704_142746-xfz2uh0q</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q' target=\"_blank\">winter-feather-1</a></strong> to <a href='https://wandb.ai/capecape/debug_dlai' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/capecape/debug_dlai' target=\"_blank\">https://wandb.ai/capecape/debug_dlai</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q' target=\"_blank\">https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">winter-feather-1</strong> at: <a href='https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q' target=\"_blank\">https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q</a><br/>Synced 6 W&B file(s), 1 media file(s), 97 artifact file(s) and 1 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20230704_142746-xfz2uh0q/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "with wandb.init(project=project, entity=entity, job_type=\"samplers_battle\", config=config):\n",
+    "    wandb.log({\"samplers_tables\":table})"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "a974258a-55fb-43ef-9136-985ec85bc3fc",
+   "metadata": {},
+   "source": [
+    "## Mixing classes during sampling"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sampling timestep   1\r"
+     ]
+    }
+   ],
+   "source": [
+    "ctx = torch.tensor([\n",
+    "    # hero, non-hero, food, spell, side-facing\n",
+    "    [1,0,0,0,0],      #human\n",
+    "    [1,0,0.6,0,0],    \n",
+    "    [0,0,0.6,0.4,0],  \n",
+    "    [1,0,0,0,1],  \n",
+    "    [1,1,0,0,0],\n",
+    "    [1,0,0,1,0]\n",
+    "]).float().to(device)\n",
+    "\n",
+    "# let's pass the same noise everytime\n",
+    "samples = torch.cat([torch.randn(1, 3, height, height)]*6, axis=0).to(device)  \n",
+    "ddpm_samples, _ = sample_ddpm_context(samples, ctx)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "626ef616-dae4-4417-9219-d67ef0794e63",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hero_table = wandb.Table(columns=[\"generation\", \"hero\", \"non-hero\", \"food\", \"spell\", \"side-facing\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for s, c in zip(ddpm_samples, ctx.cpu().numpy().tolist()):\n",
+    "    hero_table.add_data(wandb.Image(s), *c)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.4"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/Users/tcapelle/work/dlai/wandb_diffusion/wandb/run-20230704_142806-sv5fvps1</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1' target=\"_blank\">summer-spaceship-2</a></strong> to <a href='https://wandb.ai/capecape/debug_dlai' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/capecape/debug_dlai' target=\"_blank\">https://wandb.ai/capecape/debug_dlai</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1' target=\"_blank\">https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">summer-spaceship-2</strong> at: <a href='https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1' target=\"_blank\">https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1</a><br/>Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20230704_142806-sv5fvps1/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "with wandb.init(project=project, entity=entity, job_type=\"sampling_mix\", config=config):\n",
+    "    wandb.log({\"hero_table\":hero_table})"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb
new file mode 100644
index 00000000..0a030e22
--- /dev/null
+++ b/dlai/03_llm_eval.ipynb
@@ -0,0 +1,364 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2",
+   "metadata": {},
+   "source": [
+    "# LLM Tracing with W&B\n",
+    "\n",
+    "## 1. Auto-logging\n",
+    "\n",
+    "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B autologging, also available for other popular LLMs and libraries like ... "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98d52240-af93-4c87-a11e-309b23bdae9c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install wandb-addons, this will be added to wandb soon\n",
+    "# !git clone https://github.com/soumik12345/wandb-addons.git\n",
+    "# !pip install ./wandb-addons[prompts] openai wandb -qqq"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6512739b-fe35-4901-acb3-05df46b5ed9c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import random\n",
+    "import time\n",
+    "import datetime\n",
+    "\n",
+    "import openai\n",
+    "import tiktoken\n",
+    "\n",
+    "from pathlib import Path\n",
+    "from pprint import pprint\n",
+    "from getpass import getpass\n",
+    "\n",
+    "from rich.markdown import Markdown\n",
+    "import pandas as pd\n",
+    "from tenacity import (\n",
+    "    retry,\n",
+    "    stop_after_attempt,\n",
+    "    wait_random_exponential, # for exponential backoff\n",
+    ")  \n",
+    "import wandb\n",
+    "from wandb.integration.openai import autolog\n",
+    "from wandb_addons.prompts import Trace"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "autolog({\"project\":\"deeplearningai-llm\", \"job_type\": \"generation\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b2ab394b-295b-4cfa-aade-aa274003a56a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))\n",
+    "def completion_with_backoff(**kwargs):\n",
+    "    return openai.ChatCompletion.create(**kwargs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "076e62a1-188f-47e1-bda9-5e3619e7d4ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MODEL_NAME = \"gpt-3.5-turbo\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "736fe64f-5cca-4316-8842-588b948193de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_and_print(system_prompt, user_prompt, n=5):\n",
+    "    messages=[\n",
+    "            {\"role\": \"system\", \"content\": system_prompt},\n",
+    "            {\"role\": \"user\", \"content\": user_prompt},\n",
+    "        ]\n",
+    "    responses = completion_with_backoff(\n",
+    "        model=MODEL_NAME,\n",
+    "        messages=messages,\n",
+    "        n = n,\n",
+    "        )\n",
+    "    for response in responses.choices:\n",
+    "        generation = response.message.content\n",
+    "        display(Markdown(generation))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "197a256e-834f-42ee-8680-0e5cc53903cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "openai.api_key = os.environ[\"OPENAI_API_KEY\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "690e6e0a-193b-41c8-86c4-526f8061dd94",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "system_prompt = \"\"\"You are a creative copywriter.\n",
+    "You're given a category of game asset, and your goal is to design a name of that asset.\n",
+    "The game is set in a fantasy world where everyone laughs and respects each other, while celebrating diversity.\"\"\"\n",
+    "user_prompt = \"hero\"\n",
+    "generate_and_print(system_prompt, user_prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8343121b-2d47-47d1-b343-ec2393b8f02f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user_prompt = \"jewel\"\n",
+    "generate_and_print(system_prompt, user_prompt)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "16d6d513-389d-4c67-a942-a922bce6ff1a",
+   "metadata": {},
+   "source": [
+    "## 2. Using Tracer to log more complex chains\n",
+    "\n",
+    "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario. You can also use our native integration with libraries like Langchain or Llamaindex instead. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3c9fd404-51fd-44cf-b41e-b81dc589a4af",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "worlds = [\n",
+    "    \"a mystic medieval island inhabited by intelligent and funny frogs\",\n",
+    "    \"a modern castle sitting on top of a volcano in a faraway galaxy\",\n",
+    "    \"a digital world inhabited by friendly machine learning engineers\"\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d366dca-db12-4532-a98d-5b29fa8a0b71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "random.choice(worlds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0db1e20a-87a8-4386-9a8d-727db9569cd7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define your conifg\n",
+    "model_name = \"gpt-3.5-turbo\"\n",
+    "temperature = 0.7\n",
+    "system_message = \"\"\"You are a creative copywriter. \n",
+    "You're given a category of game asset, a fantasy world, and your goal is to design a name of that asset.\n",
+    "Provide the resulting name only, no additional description.\n",
+    "Single name, max 3 words output, remember!\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a86f95e-ed0d-4989-8c1d-5b88cdac7999",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run_creative_chain(query):\n",
+    "    # part 1 - a chain is started...\n",
+    "    start_time_ms = round(datetime.datetime.now().timestamp() * 1000)\n",
+    "\n",
+    "    root_span = Trace(\n",
+    "          name=\"MyCreativeChain\",\n",
+    "          kind=\"agent\",\n",
+    "          start_time_ms=start_time_ms,\n",
+    "          metadata={\"user\": \"student_1\"})\n",
+    "\n",
+    "    # part 2 - The chain calls into a child chain..\n",
+    "    chain_span = Trace(\n",
+    "          name=\"MyChain\",\n",
+    "          kind=\"chain\",\n",
+    "          start_time_ms=start_time_ms)\n",
+    "\n",
+    "    # add the Chain span as a child of the root\n",
+    "    root_span.add_child(chain_span)\n",
+    "\n",
+    "    # part 3 - your chain picks a fantasy world\n",
+    "    time.sleep(3)\n",
+    "    world = random.choice(worlds)\n",
+    "    expanded_prompt = f'Game asset category: {query}; fantasy world description: {world}'\n",
+    "    tool_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)\n",
+    "\n",
+    "    # create a Tool span \n",
+    "    tool_span = Trace(\n",
+    "          name=\"WorldPicker\",\n",
+    "          kind=\"tool\",\n",
+    "          status_code=\"success\",\n",
+    "          start_time_ms=start_time_ms,\n",
+    "          end_time_ms=tool_end_time_ms,\n",
+    "          inputs={\"input\": query},\n",
+    "          outputs={\"result\": expanded_prompt})\n",
+    "\n",
+    "    # add the TOOL span as a child of the root\n",
+    "    chain_span.add_child(tool_span)\n",
+    "\n",
+    "    # part 4 - the LLMChain calls an OpenAI LLM...\n",
+    "    messages=[\n",
+    "      {\"role\": \"system\", \"content\": system_message},\n",
+    "      {\"role\": \"user\", \"content\": expanded_prompt}\n",
+    "    ]\n",
+    "\n",
+    "    response = openai.ChatCompletion.create(model=model_name,\n",
+    "                                            messages=messages,\n",
+    "                                            temperature=temperature)   \n",
+    "\n",
+    "    llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)\n",
+    "    response_text = response[\"choices\"][0][\"message\"][\"content\"]\n",
+    "    token_usage = response[\"usage\"].to_dict()\n",
+    "\n",
+    "    llm_span = Trace(\n",
+    "          name=\"OpenAI\",\n",
+    "          kind=\"llm\",\n",
+    "          status_code=\"success\",\n",
+    "          metadata={\"temperature\":temperature,\n",
+    "                    \"token_usage\": token_usage, \n",
+    "                    \"model_name\":model_name},\n",
+    "          start_time_ms=tool_end_time_ms,\n",
+    "          end_time_ms=llm_end_time_ms,\n",
+    "          inputs={\"system_prompt\":system_message, \"query\":expanded_prompt},\n",
+    "          outputs={\"response\": response_text},\n",
+    "          )\n",
+    "\n",
+    "    # add the LLM span as a child of the Chain span...\n",
+    "    chain_span.add_child(llm_span)\n",
+    "\n",
+    "    # update the end time of the Chain span\n",
+    "    chain_span.add_inputs_and_outputs(\n",
+    "          inputs={\"query\":query},\n",
+    "          outputs={\"response\": response_text})\n",
+    "\n",
+    "    # update the Chain span's end time\n",
+    "    chain_span._span.end_time_ms = llm_end_time_ms\n",
+    "\n",
+    "    # part 5 - the final results from the tool are added \n",
+    "    root_span.add_inputs_and_outputs(inputs={\"query\": query},\n",
+    "                                     outputs={\"result\": response_text})\n",
+    "    root_span._span.end_time_ms = llm_end_time_ms\n",
+    "\n",
+    "    # part 6 - log all spans to W&B by logging the root span\n",
+    "    root_span.log(name=\"creative_trace\")\n",
+    "    print(f\"Result: {response_text}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f8500843-6d4b-4fc6-93b9-4cadf5813e4a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n",
+    "\n",
+    "run_creative_chain(\"hero\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "538d7bf3-4ae1-4b57-8a96-a34ea0614ec3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run_creative_chain(\"jewel\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1ccc075f-32bf-4451-b7ad-ab2a49cc86b6",
+   "metadata": {},
+   "source": [
+    "## Langchain agent\n",
+    "\n",
+    "WIP: add langchain agent - adding names and evaluating if they are good. Wrap a previous function as a langchain tool. \n",
+    "\n",
+    "Demonstrate W&B Tracer autologging. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "45de1fb0-3630-4673-8ac0-0dffe0a52071",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82c7ab14-4335-4649-95b4-35fb8023af1d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/dlai/README.md b/dlai/README.md
new file mode 100644
index 00000000..a20f4a8d
--- /dev/null
+++ b/dlai/README.md
@@ -0,0 +1,13 @@
+[![](https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-gradient.svg)](https://wandb.ai/capecape/dlai_diffusion)
+
+# DLAI with W&B 😎
+
+We instrument various notebooks from the generative AI course with W&B to track metrics, hyperparameters, and artifacts.
+
+- [00_intro](00_intro.ipynb) In this notebooks we learn about using wegiths and biases! we train a simple classifier on the Sprites datasets and log the results to W&B.
+- [01_diffusion_training](01_diffusion_training.ipynb) In this notebook we train a diffusion model to generate images from the Sprites dataset. We log the training metrics to W&B. We sample from the model and log the images to W&B.
+- [02_diffusion_sampling](02_diffusion_sampling.ipynb) In this notebook we sample from the trained model and log the images to W&B. We compare different sampling methods and log the results.
+- [03 LLM evaluation and debugging](03_llm_eval.ipynb) In this notebook we generate character names using LLMs and use W&B autologgers and Tracer to evaluate and debug our generations.
+- [04 WIP]() We are planning to add a CPU-based LLM finetuning notebook with a small LLM finetuned for generating names
+
+The W&B dashboard: https://wandb.ai/capecape/dlai_diffusion
\ No newline at end of file
diff --git a/dlai/requirements.txt b/dlai/requirements.txt
new file mode 100644
index 00000000..886e2b28
--- /dev/null
+++ b/dlai/requirements.txt
@@ -0,0 +1,7 @@
+torch>=2.0
+torchvision>=0.15
+matplotlib
+pandas
+numpy
+wandb
+tqdm
\ No newline at end of file
diff --git a/dlai/utilities.py b/dlai/utilities.py
new file mode 100644
index 00000000..29ce5bc9
--- /dev/null
+++ b/dlai/utilities.py
@@ -0,0 +1,376 @@
+import os
+import random
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+from matplotlib.animation import FuncAnimation, PillowWriter
+from PIL import Image
+from torch.utils.data import Dataset
+from torchvision.utils import make_grid, save_image
+
+
+def _fig_bounds(x):
+    r = x//32
+    return min(5, max(1,r))
+
+def show_image(im, ax=None, figsize=None, title=None, **kwargs):
+    "Show a PIL or PyTorch image on `ax`."
+    cmap=None
+    # Handle pytorch axis order
+    if isinstance(im, torch.Tensor):
+        im = im.data.cpu()
+        if im.shape[0]<5: im=im.permute(1,2,0)
+    elif not isinstance(im, np.ndarray): 
+        im=np.array(im)
+    # Handle 1-channel images
+    if im.shape[-1]==1: 
+        cmap = "gray"
+        im=im[...,0]
+    
+    if figsize is None: 
+        figsize = (_fig_bounds(im.shape[0]), _fig_bounds(im.shape[1]))
+    if ax is None: 
+        _,ax = plt.subplots(figsize=figsize)
+    ax.imshow(im, cmap=cmap, **kwargs)
+    if title is not None: 
+        ax.set_title(title)
+    ax.axis('off')
+    return ax
+
+class ContextUnet(nn.Module):
+    def __init__(self, in_channels, n_feat=256, n_cfeat=10, height=28):  # cfeat - context features
+        super(ContextUnet, self).__init__()
+
+        # number of input channels, number of intermediate feature maps and number of classes
+        self.in_channels = in_channels
+        self.n_feat = n_feat
+        self.n_cfeat = n_cfeat
+        self.h = height  #assume h == w. must be divisible by 4, so 28,24,20,16...
+
+        # Initialize the initial convolutional layer
+        self.init_conv = ResidualConvBlock(in_channels, n_feat, is_res=True)
+
+        # Initialize the down-sampling path of the U-Net with two levels
+        self.down1 = UnetDown(n_feat, n_feat)        # down1 #[10, 256, 8, 8]
+        self.down2 = UnetDown(n_feat, 2 * n_feat)    # down2 #[10, 256, 4,  4]
+        
+         # original: self.to_vec = nn.Sequential(nn.AvgPool2d(7), nn.GELU())
+        self.to_vec = nn.Sequential(nn.AvgPool2d((4)), nn.GELU())
+
+        # Embed the timestep and context labels with a one-layer fully connected neural network
+        self.timeembed1 = EmbedFC(1, 2*n_feat)
+        self.timeembed2 = EmbedFC(1, 1*n_feat)
+        self.contextembed1 = EmbedFC(n_cfeat, 2*n_feat)
+        self.contextembed2 = EmbedFC(n_cfeat, 1*n_feat)
+
+        # Initialize the up-sampling path of the U-Net with three levels
+        self.up0 = nn.Sequential(
+            nn.ConvTranspose2d(2 * n_feat, 2 * n_feat, self.h//4, self.h//4), # up-sample  
+            nn.GroupNorm(8, 2 * n_feat), # normalize                       
+            nn.ReLU(),
+        )
+        self.up1 = UnetUp(4 * n_feat, n_feat)
+        self.up2 = UnetUp(2 * n_feat, n_feat)
+
+        # Initialize the final convolutional layers to map to the same number of channels as the input image
+        self.out = nn.Sequential(
+            nn.Conv2d(2 * n_feat, n_feat, 3, 1, 1), # reduce number of feature maps   #in_channels, out_channels, kernel_size, stride=1, padding=0
+            nn.GroupNorm(8, n_feat), # normalize
+            nn.ReLU(),
+            nn.Conv2d(n_feat, self.in_channels, 3, 1, 1), # map to same number of channels as input
+        )
+
+    def forward(self, x, t, c=None):
+        """
+        x : (batch, n_feat, h, w) : input image
+        t : (batch, n_cfeat)      : time step
+        c : (batch, n_classes)    : context label
+        """
+        # x is the input image, c is the context label, t is the timestep, context_mask says which samples to block the context on
+
+        # pass the input image through the initial convolutional layer
+        x = self.init_conv(x)
+        # pass the result through the down-sampling path
+        down1 = self.down1(x)       #[10, 256, 8, 8]
+        down2 = self.down2(down1)   #[10, 256, 4, 4]
+        
+        # convert the feature maps to a vector and apply an activation
+        hiddenvec = self.to_vec(down2)
+        
+        # mask out context if context_mask == 1
+        if c is None:
+            c = torch.zeros(x.shape[0], self.n_cfeat).to(x)
+            
+        # embed context and timestep
+        cemb1 = self.contextembed1(c).view(-1, self.n_feat * 2, 1, 1)     # (batch, 2*n_feat, 1,1)
+        temb1 = self.timeembed1(t).view(-1, self.n_feat * 2, 1, 1)
+        cemb2 = self.contextembed2(c).view(-1, self.n_feat, 1, 1)
+        temb2 = self.timeembed2(t).view(-1, self.n_feat, 1, 1)
+        #print(f"uunet forward: cemb1 {cemb1.shape}. temb1 {temb1.shape}, cemb2 {cemb2.shape}. temb2 {temb2.shape}")
+
+
+        up1 = self.up0(hiddenvec)
+        up2 = self.up1(cemb1*up1 + temb1, down2)  # add and multiply embeddings
+        up3 = self.up2(cemb2*up2 + temb2, down1)
+        out = self.out(torch.cat((up3, x), 1))
+        return out
+
+class ResidualConvBlock(nn.Module):
+    def __init__(
+        self, in_channels: int, out_channels: int, is_res: bool = False
+    ) -> None:
+        super().__init__()
+
+        # Check if input and output channels are the same for the residual connection
+        self.same_channels = in_channels == out_channels
+
+        # Flag for whether or not to use residual connection
+        self.is_res = is_res
+
+        # First convolutional layer
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 3, 1, 1),   # 3x3 kernel with stride 1 and padding 1
+            nn.BatchNorm2d(out_channels),   # Batch normalization
+            nn.GELU(),   # GELU activation function
+        )
+
+        # Second convolutional layer
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(out_channels, out_channels, 3, 1, 1),   # 3x3 kernel with stride 1 and padding 1
+            nn.BatchNorm2d(out_channels),   # Batch normalization
+            nn.GELU(),   # GELU activation function
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+
+        # If using residual connection
+        if self.is_res:
+            # Apply first convolutional layer
+            x1 = self.conv1(x)
+
+            # Apply second convolutional layer
+            x2 = self.conv2(x1)
+
+            # If input and output channels are the same, add residual connection directly
+            if self.same_channels:
+                out = x + x2
+            else:
+                # If not, apply a 1x1 convolutional layer to match dimensions before adding residual connection
+                shortcut = nn.Conv2d(x.shape[1], x2.shape[1], kernel_size=1, stride=1, padding=0).to(x.device)
+                out = shortcut(x) + x2
+            #print(f"resconv forward: x {x.shape}, x1 {x1.shape}, x2 {x2.shape}, out {out.shape}")
+
+            # Normalize output tensor
+            return out / 1.414
+
+        # If not using residual connection, return output of second convolutional layer
+        else:
+            x1 = self.conv1(x)
+            x2 = self.conv2(x1)
+            return x2
+
+    # Method to get the number of output channels for this block
+    def get_out_channels(self):
+        return self.conv2[0].out_channels
+
+    # Method to set the number of output channels for this block
+    def set_out_channels(self, out_channels):
+        self.conv1[0].out_channels = out_channels
+        self.conv2[0].in_channels = out_channels
+        self.conv2[0].out_channels = out_channels
+
+        
+
+class UnetUp(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(UnetUp, self).__init__()
+        
+        # Create a list of layers for the upsampling block
+        # The block consists of a ConvTranspose2d layer for upsampling, followed by two ResidualConvBlock layers
+        layers = [
+            nn.ConvTranspose2d(in_channels, out_channels, 2, 2),
+            ResidualConvBlock(out_channels, out_channels),
+            ResidualConvBlock(out_channels, out_channels),
+        ]
+        
+        # Use the layers to create a sequential model
+        self.model = nn.Sequential(*layers)
+
+    def forward(self, x, skip):
+        # Concatenate the input tensor x with the skip connection tensor along the channel dimension
+        x = torch.cat((x, skip), 1)
+        
+        # Pass the concatenated tensor through the sequential model and return the output
+        x = self.model(x)
+        return x
+
+    
+class UnetDown(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(UnetDown, self).__init__()
+        
+        # Create a list of layers for the downsampling block
+        # Each block consists of two ResidualConvBlock layers, followed by a MaxPool2d layer for downsampling
+        layers = [ResidualConvBlock(in_channels, out_channels), ResidualConvBlock(out_channels, out_channels), nn.MaxPool2d(2)]
+        
+        # Use the layers to create a sequential model
+        self.model = nn.Sequential(*layers)
+
+    def forward(self, x):
+        # Pass the input through the sequential model and return the output
+        return self.model(x)
+
+class EmbedFC(nn.Module):
+    def __init__(self, input_dim, emb_dim):
+        super(EmbedFC, self).__init__()
+        '''
+        This class defines a generic one layer feed-forward neural network for embedding input data of
+        dimensionality input_dim to an embedding space of dimensionality emb_dim.
+        '''
+        self.input_dim = input_dim
+        
+        # define the layers for the network
+        layers = [
+            nn.Linear(input_dim, emb_dim),
+            nn.GELU(),
+            nn.Linear(emb_dim, emb_dim),
+        ]
+        
+        # create a PyTorch sequential model consisting of the defined layers
+        self.model = nn.Sequential(*layers)
+
+    def forward(self, x):
+        # flatten the input tensor
+        x = x.view(-1, self.input_dim)
+        # apply the model layers to the flattened tensor
+        return self.model(x)
+    
+def unorm(x):
+    # unity norm. results in range of [0,1]
+    # assume x (h,w,3)
+    xmax = x.max((0,1))
+    xmin = x.min((0,1))
+    return(x - xmin)/(xmax - xmin)
+
+def norm_all(store, n_t, n_s):
+    # runs unity norm on all timesteps of all samples
+    nstore = np.zeros_like(store)
+    for t in range(n_t):
+        for s in range(n_s):
+            nstore[t,s] = unorm(store[t,s])
+    return nstore
+
+def norm_torch(x_all):
+    # runs unity norm on all timesteps of all samples
+    # input is (n_samples, 3,h,w), the torch image format
+    x = x_all.cpu().numpy()
+    xmax = x.max((2,3))
+    xmin = x.min((2,3))
+    xmax = np.expand_dims(xmax,(2,3)) 
+    xmin = np.expand_dims(xmin,(2,3))
+    nstore = (x - xmin)/(xmax - xmin)
+    return torch.from_numpy(nstore)
+
+def gen_tst_context(n_cfeat):
+    """
+    Generate test context vectors
+    """
+    vec = torch.tensor([
+    [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1],  [0,0,0,0,0],      # human, non-human, food, spell, side-facing
+    [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1],  [0,0,0,0,0],      # human, non-human, food, spell, side-facing
+    [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1],  [0,0,0,0,0],      # human, non-human, food, spell, side-facing
+    [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1],  [0,0,0,0,0],      # human, non-human, food, spell, side-facing
+    [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1],  [0,0,0,0,0],      # human, non-human, food, spell, side-facing
+    [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1],  [0,0,0,0,0]]      # human, non-human, food, spell, side-facing
+    )
+    return len(vec), vec
+
+def plot_grid(x,n_sample,n_rows,save_dir,w):
+    # x:(n_sample, 3, h, w)
+    ncols = n_sample//n_rows
+    grid = make_grid(norm_torch(x), nrow=ncols)  # curiously, nrow is number of columns.. or number of items in the row.
+    save_image(grid, save_dir + f"run_image_w{w}.png")
+    print('saved image at ' + save_dir + f"run_image_w{w}.png")
+    return grid
+
+def plot_sample(x_gen_store,n_sample,nrows,save_dir, fn,  w, save=False):
+    ncols = n_sample//nrows
+    sx_gen_store = np.moveaxis(x_gen_store,2,4)                               # change to Numpy image format (h,w,channels) vs (channels,h,w)
+    nsx_gen_store = norm_all(sx_gen_store, sx_gen_store.shape[0], n_sample)   # unity norm to put in range [0,1] for np.imshow
+    
+    # create gif of images evolving over time, based on x_gen_store
+    fig, axs = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, sharey=True,figsize=(ncols,nrows))
+    def animate_diff(i, store):
+        print(f'gif animating frame {i} of {store.shape[0]}', end='\r')
+        plots = []
+        for row in range(nrows):
+            for col in range(ncols):
+                axs[row, col].clear()
+                axs[row, col].set_xticks([])
+                axs[row, col].set_yticks([])
+                plots.append(axs[row, col].imshow(store[i,(row*ncols)+col]))
+        return plots
+    ani = FuncAnimation(fig, animate_diff, fargs=[nsx_gen_store],  interval=200, blit=False, repeat=True, frames=nsx_gen_store.shape[0]) 
+    plt.close()
+    if save:
+        ani.save(save_dir + f"{fn}_w{w}.gif", dpi=100, writer=PillowWriter(fps=5))
+        print('saved gif at ' + save_dir + f"{fn}_w{w}.gif")
+    return ani
+
+
+default_tfms = transforms.Compose([
+    transforms.ToTensor(),                # from [0,255] to range [0.0,1.0]
+    transforms.RandomHorizontalFlip(),    # randomly flip and rotate
+    transforms.Normalize((0.5,), (0.5,))  # range [-1,1]
+])
+
+class CustomDataset(Dataset):
+    def __init__(self, sprites, slabels, transform=default_tfms, null_context=False, argmax=False):
+        self.sprites = sprites
+        if argmax:
+            self.slabels = np.argmax(slabels, axis=1)
+        else:
+            self.slabels = slabels
+        print(f"sprite shape: {self.sprites.shape}")
+        print(f"labels shape: {self.slabels.shape}")
+        self.transform = transform
+        self.null_context = null_context
+
+    @classmethod
+    def from_np(cls, sfilename, lfilename, transform=default_tfms, null_context=False, argmax=False):
+        sprites = np.load(sfilename)
+        slabels = np.load(lfilename)
+        return cls(sprites, slabels, transform, null_context, argmax)
+
+    # Return the number of images in the dataset
+    def __len__(self):
+        return len(self.sprites)
+    
+    # Get the image and label at a given index
+    def __getitem__(self, idx):
+        # Return the image and label as a tuple
+        if self.transform:
+            image = self.transform(self.sprites[idx])
+            if self.null_context:
+                label = torch.tensor(0).to(torch.int64)
+            else:
+                label = torch.tensor(self.slabels[idx]).to(torch.int64)
+        return (image, label)
+    
+
+    def subset(self, slice_size=1000):
+        # return a subset of the dataset
+        indices = random.sample(range(len(self)), slice_size)
+        return CustomDataset(self.sprites[indices], self.slabels[indices], self.transform, self.null_context)
+
+    def split(self, pct=0.2):
+        "split dataset into train and test"
+        train_size = int((1-pct)*len(self))
+        test_size = len(self) - train_size
+        train_dataset, test_dataset = torch.utils.data.random_split(self, [train_size, test_size])
+        return train_dataset, test_dataset
+
+

From e0dbcbe9f6bfc7817f33ed3e32823a9c53868120 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Wed, 5 Jul 2023 18:18:08 +0200
Subject: [PATCH 02/43] data as lfs

---
 .gitattributes                            | 1 +
 dlai/data/sprite_labels_nc_1788_16x16.npy | 3 +++
 dlai/data/sprites_1788_16x16.npy          | 3 +++
 3 files changed, 7 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 dlai/data/sprite_labels_nc_1788_16x16.npy
 create mode 100644 dlai/data/sprites_1788_16x16.npy

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..36dce4d1
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.npy filter=lfs diff=lfs merge=lfs -text
diff --git a/dlai/data/sprite_labels_nc_1788_16x16.npy b/dlai/data/sprite_labels_nc_1788_16x16.npy
new file mode 100644
index 00000000..b5eec1e2
--- /dev/null
+++ b/dlai/data/sprite_labels_nc_1788_16x16.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b71222bd58b57cd99b1b92d830393e86ce215e0f69602f2c82aad1522f030ed7
+size 3576128
diff --git a/dlai/data/sprites_1788_16x16.npy b/dlai/data/sprites_1788_16x16.npy
new file mode 100644
index 00000000..1055e7de
--- /dev/null
+++ b/dlai/data/sprites_1788_16x16.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61cf3b7e3184f57f2bc2bf5e75fbcf08ba379241f58966c62a9716ef581b2916
+size 68659328

From 72f41473bfa2f9c0b7bab9439a8e0f453a17e35b Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Wed, 5 Jul 2023 18:44:00 +0200
Subject: [PATCH 03/43] add model ckpt

---
 .gitattributes                              | 1 +
 dlai/data/weights/context_model_trained.pth | 3 +++
 2 files changed, 4 insertions(+)
 create mode 100644 dlai/data/weights/context_model_trained.pth

diff --git a/.gitattributes b/.gitattributes
index 36dce4d1..a93af385 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,2 @@
 *.npy filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
diff --git a/dlai/data/weights/context_model_trained.pth b/dlai/data/weights/context_model_trained.pth
new file mode 100644
index 00000000..451319cf
--- /dev/null
+++ b/dlai/data/weights/context_model_trained.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51535239b6f3e953db8ff9631278c3e6b133a5a500780bda5092db620ca8f570
+size 5989463

From 4b80deb7da3a275d9ec0d452be2b28ed0838bf28 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 09:28:24 +0200
Subject: [PATCH 04/43] add wandb.login()

---
 dlai/01_diffusion_training.ipynb | 138 +++++++++++++++++++++++++++++--
 dlai/02_diffusion_sampling.ipynb |   4 +-
 2 files changed, 131 insertions(+), 11 deletions(-)

diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index 46530b6b..264e6272 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -33,6 +33,16 @@
     "import wandb"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b88f9513",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# wandb.login() # uncomment if you want to login to wandb"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -61,7 +71,6 @@
     "\n",
     "# network hyperparameters\n",
     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n",
-    "# device = \"mps\"\n",
     "n_feat = 64 # 64 hidden dimension feature\n",
     "n_cfeat = 5 # context vector is of size 5\n",
     "height = 16 # 16x16 image\n",
@@ -154,7 +163,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "eb13689d",
    "metadata": {},
    "outputs": [],
@@ -184,7 +193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "8b0f5bed",
    "metadata": {},
    "outputs": [],
@@ -200,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "16085a65",
    "metadata": {},
    "outputs": [],
@@ -247,7 +256,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "d88afdba",
    "metadata": {
     "tags": []
@@ -264,10 +273,123 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "f5f4af69",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.4"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/Users/tcapelle/work/edu/dlai/wandb/run-20230705_183609-09mz5kur</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/capecape/dlai_diffusion/runs/09mz5kur' target=\"_blank\">sparkling-frost-38</a></strong> to <a href='https://wandb.ai/capecape/dlai_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/capecape/dlai_diffusion' target=\"_blank\">https://wandb.ai/capecape/dlai_diffusion</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/capecape/dlai_diffusion/runs/09mz5kur' target=\"_blank\">https://wandb.ai/capecape/dlai_diffusion/runs/09mz5kur</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b4565f2a74df4f878429f373cacde283",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "862fdffecb0c40508029ce43d2b9d22f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[10], line 23\u001b[0m\n\u001b[1;32m     21\u001b[0m loss \u001b[39m=\u001b[39m F\u001b[39m.\u001b[39mmse_loss(pred_noise, noise)\n\u001b[1;32m     22\u001b[0m loss\u001b[39m.\u001b[39mbackward()    \n\u001b[0;32m---> 23\u001b[0m optim\u001b[39m.\u001b[39;49mstep()\n\u001b[1;32m     25\u001b[0m \u001b[39m# we log the relevant metrics to the workspace\u001b[39;00m\n\u001b[1;32m     26\u001b[0m wandb\u001b[39m.\u001b[39mlog({\u001b[39m\"\u001b[39m\u001b[39mloss\u001b[39m\u001b[39m\"\u001b[39m: loss\u001b[39m.\u001b[39mitem(),\n\u001b[1;32m     27\u001b[0m            \u001b[39m\"\u001b[39m\u001b[39mlr\u001b[39m\u001b[39m\"\u001b[39m: optim\u001b[39m.\u001b[39mparam_groups[\u001b[39m0\u001b[39m][\u001b[39m'\u001b[39m\u001b[39mlr\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m     28\u001b[0m            \u001b[39m\"\u001b[39m\u001b[39mepoch\u001b[39m\u001b[39m\"\u001b[39m: ep})\n",
+      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/optimizer.py:280\u001b[0m, in \u001b[0;36mOptimizer.profile_hook_step.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    276\u001b[0m         \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    277\u001b[0m             \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mfunc\u001b[39m}\u001b[39;00m\u001b[39m must return None or a tuple of (new_args, new_kwargs),\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    278\u001b[0m                                \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mbut got \u001b[39m\u001b[39m{\u001b[39;00mresult\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 280\u001b[0m out \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m    281\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_optimizer_step_code()\n\u001b[1;32m    283\u001b[0m \u001b[39m# call optimizer step post hooks\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/optimizer.py:33\u001b[0m, in \u001b[0;36m_use_grad_for_differentiable.<locals>._use_grad\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     31\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m     32\u001b[0m     torch\u001b[39m.\u001b[39mset_grad_enabled(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdefaults[\u001b[39m'\u001b[39m\u001b[39mdifferentiable\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[0;32m---> 33\u001b[0m     ret \u001b[39m=\u001b[39m func(\u001b[39mself\u001b[39;49m, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m     34\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m     35\u001b[0m     torch\u001b[39m.\u001b[39mset_grad_enabled(prev_grad)\n",
+      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:141\u001b[0m, in \u001b[0;36mAdam.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m    130\u001b[0m     beta1, beta2 \u001b[39m=\u001b[39m group[\u001b[39m'\u001b[39m\u001b[39mbetas\u001b[39m\u001b[39m'\u001b[39m]\n\u001b[1;32m    132\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_group(\n\u001b[1;32m    133\u001b[0m         group,\n\u001b[1;32m    134\u001b[0m         params_with_grad,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    138\u001b[0m         max_exp_avg_sqs,\n\u001b[1;32m    139\u001b[0m         state_steps)\n\u001b[0;32m--> 141\u001b[0m     adam(\n\u001b[1;32m    142\u001b[0m         params_with_grad,\n\u001b[1;32m    143\u001b[0m         grads,\n\u001b[1;32m    144\u001b[0m         exp_avgs,\n\u001b[1;32m    145\u001b[0m         exp_avg_sqs,\n\u001b[1;32m    146\u001b[0m         max_exp_avg_sqs,\n\u001b[1;32m    147\u001b[0m         state_steps,\n\u001b[1;32m    148\u001b[0m         amsgrad\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mamsgrad\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    149\u001b[0m         beta1\u001b[39m=\u001b[39;49mbeta1,\n\u001b[1;32m    150\u001b[0m         beta2\u001b[39m=\u001b[39;49mbeta2,\n\u001b[1;32m    151\u001b[0m         lr\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mlr\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    152\u001b[0m         weight_decay\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mweight_decay\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    153\u001b[0m         eps\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39meps\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    154\u001b[0m         maximize\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mmaximize\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    155\u001b[0m         foreach\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mforeach\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    156\u001b[0m         capturable\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mcapturable\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    157\u001b[0m         differentiable\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mdifferentiable\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    158\u001b[0m         fused\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mfused\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    159\u001b[0m         grad_scale\u001b[39m=\u001b[39;49m\u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mgrad_scale\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m    160\u001b[0m         found_inf\u001b[39m=\u001b[39;49m\u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mfound_inf\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m    161\u001b[0m     )\n\u001b[1;32m    163\u001b[0m \u001b[39mreturn\u001b[39;00m loss\n",
+      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:281\u001b[0m, in \u001b[0;36madam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach, capturable, differentiable, fused, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)\u001b[0m\n\u001b[1;32m    278\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    279\u001b[0m     func \u001b[39m=\u001b[39m _single_tensor_adam\n\u001b[0;32m--> 281\u001b[0m func(params,\n\u001b[1;32m    282\u001b[0m      grads,\n\u001b[1;32m    283\u001b[0m      exp_avgs,\n\u001b[1;32m    284\u001b[0m      exp_avg_sqs,\n\u001b[1;32m    285\u001b[0m      max_exp_avg_sqs,\n\u001b[1;32m    286\u001b[0m      state_steps,\n\u001b[1;32m    287\u001b[0m      amsgrad\u001b[39m=\u001b[39;49mamsgrad,\n\u001b[1;32m    288\u001b[0m      beta1\u001b[39m=\u001b[39;49mbeta1,\n\u001b[1;32m    289\u001b[0m      beta2\u001b[39m=\u001b[39;49mbeta2,\n\u001b[1;32m    290\u001b[0m      lr\u001b[39m=\u001b[39;49mlr,\n\u001b[1;32m    291\u001b[0m      weight_decay\u001b[39m=\u001b[39;49mweight_decay,\n\u001b[1;32m    292\u001b[0m      eps\u001b[39m=\u001b[39;49meps,\n\u001b[1;32m    293\u001b[0m      maximize\u001b[39m=\u001b[39;49mmaximize,\n\u001b[1;32m    294\u001b[0m      capturable\u001b[39m=\u001b[39;49mcapturable,\n\u001b[1;32m    295\u001b[0m      differentiable\u001b[39m=\u001b[39;49mdifferentiable,\n\u001b[1;32m    296\u001b[0m      grad_scale\u001b[39m=\u001b[39;49mgrad_scale,\n\u001b[1;32m    297\u001b[0m      found_inf\u001b[39m=\u001b[39;49mfound_inf)\n",
+      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:344\u001b[0m, in \u001b[0;36m_single_tensor_adam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize, capturable, differentiable)\u001b[0m\n\u001b[1;32m    341\u001b[0m     param \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mview_as_real(param)\n\u001b[1;32m    343\u001b[0m \u001b[39m# Decay the first and second moment running average coefficient\u001b[39;00m\n\u001b[0;32m--> 344\u001b[0m exp_avg\u001b[39m.\u001b[39;49mmul_(beta1)\u001b[39m.\u001b[39madd_(grad, alpha\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m beta1)\n\u001b[1;32m    345\u001b[0m exp_avg_sq\u001b[39m.\u001b[39mmul_(beta2)\u001b[39m.\u001b[39maddcmul_(grad, grad\u001b[39m.\u001b[39mconj(), value\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m beta2)\n\u001b[1;32m    347\u001b[0m \u001b[39mif\u001b[39;00m capturable \u001b[39mor\u001b[39;00m differentiable:\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
    "source": [
     "# same code as before, added comments on the extra W&B instrumentation lines\n",
     "# create a wandb run\n",
@@ -301,7 +423,7 @@
     "    # save model periodically\n",
     "    if ep%4==0 or ep == int(n_epoch-1):\n",
     "        nn_model.eval()\n",
-    "        ckpt_file = save_dir + f\"context_model_{ep}.pth\"\n",
+    "        ckpt_file = save_dir + f\"context_model.pth\"\n",
     "        torch.save(nn_model.state_dict(), ckpt_file)\n",
     "\n",
     "        # save model to wandb as an Artifact\n",
diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index 23cd1f26..0cc8f093 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -61,8 +61,6 @@
     "\n",
     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n",
     "\n",
-    "device = \"mps\"\n",
-    "\n",
     "# we are storing the parameters in a dictionary to be logged to wandb\n",
     "config = dict(\n",
     "    timesteps=timesteps,\n",
@@ -101,7 +99,7 @@
     "    producer_run = artifact.logged_by()\n",
     "\n",
     "    # load the weights dictionary\n",
-    "    model_weights = torch.load(model_path + f\"/context_model_31.pth\", map_location=\"cpu\")\n",
+    "    model_weights = torch.load(model_path + f\"/context_model.pth\", map_location=\"cpu\")\n",
     "\n",
     "    # create the model\n",
     "    model = ContextUnet(in_channels=3, \n",

From ba00e4ec22bc29b447af86c8526c163831a05ab0 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 11:21:15 +0000
Subject: [PATCH 05/43] llm train nb

---
 dlai/04_train_llm.ipynb | 575 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 575 insertions(+)
 create mode 100644 dlai/04_train_llm.ipynb

diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb
new file mode 100644
index 00000000..ae128ebc
--- /dev/null
+++ b/dlai/04_train_llm.ipynb
@@ -0,0 +1,575 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "fe0ed738-872b-4952-baf8-b3f7c92214a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install transformers accelerate dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "f64e095c-d28b-44aa-a122-4121c5c66a5b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "from transformers import AutoTokenizer\n",
+    "from datasets import load_dataset\n",
+    "from transformers import AutoModelForCausalLM\n",
+    "from transformers import Trainer, TrainingArguments"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3fd80268-c4a1-4e1a-aed3-cd5c3ab4d48f",
+   "metadata": {},
+   "source": [
+    "Load a dataset from Huggingface"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a7535b8b-d220-44e8-a56c-97e250c36596",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds = load_dataset('wikitext', 'wikitext-2-raw-v1')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "2286ae41-213d-480d-a4ba-8c4e2e1c4771",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_checkpoint = \"roneneldan/TinyStories-1M\"  # distilgpt2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "26dfa0b7-8d9f-44f3-9e09-bc12bcb5ae0b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f4f7a79d-9519-4133-a8cd-0a2bc59ee97b",
+   "metadata": {},
+   "source": [
+    "We can now call the tokenizer on all our texts. This is very simple, using the map method from the Datasets library. First we define a function that call the tokenizer on our texts:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "0ea05869-8ece-4a82-b9d4-3a62a84b6a77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tokenize_function(examples):\n",
+    "    return tokenizer(examples[\"text\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0413ebca-019b-49dc-b042-cf3cb20bf26c",
+   "metadata": {},
+   "source": [
+    "Then we apply it to all the splits in our `datasets` object, using `batched=True` and 4 processes to speed up the preprocessing. We won't need the `text` column afterward, so we discard it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "22368c91-ddf8-4b08-848e-f732ff155494",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/4358 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/36718 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3760 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "tokenized_datasets = ds.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5880220-3b8b-414e-9e9a-6e6541784417",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "d59cc8a9-5f87-4eb7-abbc-f4fc18fea51d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'input_ids': [796, 569, 18354, 7496, 17740, 6711, 796, 220, 198],\n",
+       " 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tokenized_datasets[\"train\"][1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "70bc12ae-52dc-47ad-b9ef-1e5b8af829e8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "' = Valkyria Chronicles III = \\n'"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tokenizer.decode(tokenized_datasets[\"train\"][1][\"input_ids\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "85c6ad00-3825-4f12-be49-8ff336d5d398",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "block_size = 128\n",
+    "\n",
+    "def group_texts(examples):\n",
+    "    # Concatenate all texts.\n",
+    "    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n",
+    "    total_length = len(concatenated_examples[list(examples.keys())[0]])\n",
+    "    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can\n",
+    "        # customize this part to your needs.\n",
+    "    total_length = (total_length // block_size) * block_size\n",
+    "    # Split by chunks of max_len.\n",
+    "    result = {\n",
+    "        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n",
+    "        for k, t in concatenated_examples.items()\n",
+    "    }\n",
+    "    result[\"labels\"] = result[\"input_ids\"].copy()\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26a73750-5e38-4236-a5c3-b356d8041dc3",
+   "metadata": {},
+   "source": [
+    "First note that we duplicate the inputs for our labels. This is because the model of the 🤗 Transformers library apply the shifting to the right, so we don't need to do it manually.\n",
+    "\n",
+    "Also note that by default, the `map` method will send a batch of 1,000 examples to be treated by the preprocessing function. So here, we will drop the remainder to make the concatenated tokenized texts a multiple of `block_size` every 1,000 examples. You can adjust this behavior by passing a higher batch size (which will also be processed slower). You can also speed-up the preprocessing by using multiprocessing:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "572f29c8-84d3-45b9-b8df-26de8c22bc91",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/4358 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/36718 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/3760 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "lm_datasets = tokenized_datasets.map(\n",
+    "    group_texts,\n",
+    "    batched=True,\n",
+    "    batch_size=1000,\n",
+    "    num_proc=4,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "a9e1a750-f115-46a8-b8fb-a4cb5d27d954",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "' game and follows the \" Nameless \", a penal military unit serving the nation of Gallia during the Second Europan War who perform secret black operations and are pitted against the Imperial unit \" Calamaty Raven \". \\n The game began development in 2010, carrying over a large portion of the work done on Valkyria Chronicles II. While it retained the standard features of the series, it also underwent multiple adjustments, such as making the game more forgiving for series newcomers. Character designer Raita Honjou and composer Hitoshi Sakimoto both returned from previous entries, along with Valkyria Chronicles II director Takeshi Oz'"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tokenizer.decode(lm_datasets[\"train\"][1][\"input_ids\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "b4f131eb-979e-40f6-9e28-19756beaa8e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "183ccb9277844038a6aa86466d58ac1c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)lve/main/config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ba4f041c71ca467482a4137c78f882c7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading pytorch_model.bin:   0%|          | 0.00/48.6M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "model = AutoModelForCausalLM.from_pretrained(model_checkpoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "b99d656a-2ea8-4aca-8809-0d6c9a4aa15d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "7345ab23-8d12-4d4c-a39d-bb2202bff218",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"WANDB_PROJECT\"] = \"tiny-stories\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "d74ee155-3c30-4ef2-9c4d-fd8ee222c50c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = model_checkpoint.split(\"/\")[-1]\n",
+    "training_args = TrainingArguments(\n",
+    "    f\"{model_name}-finetuned-wikitext2\",\n",
+    "    report_to=\"wandb\",\n",
+    "    logging_steps=1,\n",
+    "    evaluation_strategy = \"epoch\",\n",
+    "    learning_rate=2e-5,\n",
+    "    weight_decay=0.01,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "af62105f-a478-436f-88a2-5c1d78b9d20a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=lm_datasets[\"train\"],\n",
+    "    eval_dataset=lm_datasets[\"validation\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "01958a56-c22a-4a27-bc71-41c59fc97f05",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33manony-moose-57595088200203951\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.5"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_104512-3u6izcp0</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/anony-moose-57595088200203951/tiny-stories/runs/3u6izcp0?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">fallen-voice-1</a></strong> to <a href='https://wandb.ai/anony-moose-57595088200203951/tiny-stories?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/anony-moose-57595088200203951/tiny-stories?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/tiny-stories?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/anony-moose-57595088200203951/tiny-stories/runs/3u6izcp0?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/tiny-stories/runs/3u6izcp0?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Do NOT share these links with anyone. They can be used to claim your runs."
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='7002' max='7002' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [7002/7002 03:51, Epoch 3/3]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>6.471900</td>\n",
+       "      <td>6.374047</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2</td>\n",
+       "      <td>5.812100</td>\n",
+       "      <td>6.196322</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>3</td>\n",
+       "      <td>5.659900</td>\n",
+       "      <td>6.153089</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=7002, training_loss=6.346599991952852, metrics={'train_runtime': 237.6024, 'train_samples_per_second': 235.679, 'train_steps_per_second': 29.469, 'total_flos': 17136527671296.0, 'train_loss': 6.346599991952852, 'epoch': 3.0})"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "36f0eef0-8729-4d32-a2aa-b1577847f7f5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 8235b76357cb708986b8bea0aa53325d19f331fb Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 11:23:33 +0000
Subject: [PATCH 06/43] split wandb calls

---
 dlai/03_llm_eval.ipynb | 502 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 469 insertions(+), 33 deletions(-)

diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb
index 0a030e22..e4d6cdef 100644
--- a/dlai/03_llm_eval.ipynb
+++ b/dlai/03_llm_eval.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "98d52240-af93-4c87-a11e-309b23bdae9c",
    "metadata": {},
    "outputs": [],
@@ -26,7 +26,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "6512739b-fe35-4901-acb3-05df46b5ed9c",
    "metadata": {},
    "outputs": [],
@@ -57,17 +57,108 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b",
+   "execution_count": 3,
+   "id": "83639bac-5860-4db1-9867-7c89f3ca25a6",
    "metadata": {},
    "outputs": [],
    "source": [
-    "autolog({\"project\":\"deeplearningai-llm\", \"job_type\": \"generation\"})"
+    "PROJECT = \"deeplearningai-llm\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
+   "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33manony-moose-57595088200203951\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.5"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_085050-g2v28ryo</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">revived-dew-9</a></strong> to <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Do NOT share these links with anyone. They can be used to claim your runs."
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "autolog({\"project\":PROJECT, \"job_type\": \"generation\", \"anonymous\":\"allow\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
    "id": "b2ab394b-295b-4cfa-aade-aa274003a56a",
    "metadata": {},
    "outputs": [],
@@ -79,7 +170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "076e62a1-188f-47e1-bda9-5e3619e7d4ba",
    "metadata": {},
    "outputs": [],
@@ -89,7 +180,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "736fe64f-5cca-4316-8842-588b948193de",
    "metadata": {},
    "outputs": [],
@@ -111,20 +202,93 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "197a256e-834f-42ee-8680-0e5cc53903cb",
    "metadata": {},
    "outputs": [],
    "source": [
-    "openai.api_key = os.environ[\"OPENAI_API_KEY\"]"
+    "# openai.api_key = os.environ[\"OPENAI_API_KEY\"]  # this probably needs some setup on their platform"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "690e6e0a-193b-41c8-86c4-526f8061dd94",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Artifacts logged anonymously cannot be claimed and expire after 7 days.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Euphoric Guardian                                                                                                  \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Euphoric Guardian                                                                                                  \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Harmony's Chosen                                                                                                   \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Harmony's Chosen                                                                                                   \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\"Hilaria: The Laughing Guardian\"                                                                                   \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\"Hilaria: The Laughing Guardian\"                                                                                   \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Unity's Embrace                                                                                                    \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Unity's Embrace                                                                                                    \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Harmonic Defender                                                                                                  \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Harmonic Defender                                                                                                  \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "system_prompt = \"\"\"You are a creative copywriter.\n",
     "You're given a category of game asset, and your goal is to design a name of that asset.\n",
@@ -135,15 +299,166 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "8343121b-2d47-47d1-b343-ec2393b8f02f",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Artifacts logged anonymously cannot be claimed and expire after 7 days.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Harmony Gems                                                                                                       \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Harmony Gems                                                                                                       \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Gleam Haven                                                                                                        \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Gleam Haven                                                                                                        \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Gleaming Harmony Gem                                                                                               \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Gleaming Harmony Gem                                                                                               \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Gleamstone                                                                                                         \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Gleamstone                                                                                                         \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Harmony Gems                                                                                                       \n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Harmony Gems                                                                                                       \n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "user_prompt = \"jewel\"\n",
     "generate_and_print(system_prompt, user_prompt)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "3266487e-150b-4dd8-9555-94e94a66aac1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "084faecfc3a6412baa45b2aca421f0e1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='0.015 MB of 0.015 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<style>\n",
+       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
+       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
+       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
+       "    </style>\n",
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>█▁</td></tr><tr><td>usage/elapsed_time</td><td>█▁</td></tr><tr><td>usage/prompt_tokens</td><td>▁█</td></tr><tr><td>usage/total_tokens</td><td>█▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>19</td></tr><tr><td>usage/elapsed_time</td><td>0.74012</td></tr><tr><td>usage/prompt_tokens</td><td>62</td></tr><tr><td>usage/total_tokens</td><td>81</td></tr></table><br/></div></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">revived-dew-9</strong> at: <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a><br/>Synced 6 W&B file(s), 2 media file(s), 2 artifact file(s) and 0 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20230706_085050-g2v28ryo/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "wandb.finish()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "16d6d513-389d-4c67-a942-a922bce6ff1a",
@@ -156,7 +471,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "3c9fd404-51fd-44cf-b41e-b81dc589a4af",
    "metadata": {},
    "outputs": [],
@@ -170,17 +485,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "9d366dca-db12-4532-a98d-5b29fa8a0b71",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'a mystic medieval island inhabited by intelligent and funny frogs'"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "random.choice(worlds)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "id": "0db1e20a-87a8-4386-9a8d-727db9569cd7",
    "metadata": {},
    "outputs": [],
@@ -196,7 +522,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "id": "1a86f95e-ed0d-4989-8c1d-5b88cdac7999",
    "metadata": {},
    "outputs": [],
@@ -283,32 +609,144 @@
     "    root_span._span.end_time_ms = llm_end_time_ms\n",
     "\n",
     "    # part 6 - log all spans to W&B by logging the root span\n",
-    "    root_span.log(name=\"creative_trace\")\n",
+    "    root_span.log(name=\"trace\")\n",
     "    print(f\"Result: {response_text}\")\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "id": "f8500843-6d4b-4fc6-93b9-4cadf5813e4a",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e28b19b74fd0478381e1f535cf6a2655",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670242549995842, max=1.0…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.5"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_085107-3awycdve</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/3awycdve?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">faithful-leaf-10</a></strong> to <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/3awycdve?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/3awycdve?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Do NOT share these links with anyone. They can be used to claim your runs."
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Result: Croakvalor\n"
+     ]
+    }
+   ],
    "source": [
-    "openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n",
+    "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")\n",
     "\n",
     "run_creative_chain(\"hero\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "id": "538d7bf3-4ae1-4b57-8a96-a34ea0614ec3",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Result: Volcanium Gem\n"
+     ]
+    }
+   ],
    "source": [
     "run_creative_chain(\"jewel\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "45de1fb0-3630-4673-8ac0-0dffe0a52071",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wandb.finish()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "1ccc075f-32bf-4451-b7ad-ab2a49cc86b6",
@@ -322,19 +760,17 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "45de1fb0-3630-4673-8ac0-0dffe0a52071",
+   "cell_type": "markdown",
+   "id": "d05ae137-04ef-4f7c-9cdd-67b71b92db3a",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# "
+    "## Finish the run"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "82c7ab14-4335-4649-95b4-35fb8023af1d",
+   "id": "5738431a-e281-4abf-9837-44fec6811ff4",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -342,7 +778,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -356,7 +792,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,

From 1c419f7ca3276e6e56dc5ab9c3776ba7d22f2fc1 Mon Sep 17 00:00:00 2001
From: kldarek <darek.kleczek@gmail.com>
Date: Thu, 6 Jul 2023 13:42:55 +0200
Subject: [PATCH 07/43] add llm agent

---
 dlai/03_llm_eval.ipynb | 696 ++++++++++++-----------------------------
 1 file changed, 199 insertions(+), 497 deletions(-)

diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb
index e4d6cdef..797527c1 100644
--- a/dlai/03_llm_eval.ipynb
+++ b/dlai/03_llm_eval.ipynb
@@ -1,20 +1,8 @@
 {
  "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2",
-   "metadata": {},
-   "source": [
-    "# LLM Tracing with W&B\n",
-    "\n",
-    "## 1. Auto-logging\n",
-    "\n",
-    "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B autologging, also available for other popular LLMs and libraries like ... "
-   ]
-  },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "98d52240-af93-4c87-a11e-309b23bdae9c",
    "metadata": {},
    "outputs": [],
@@ -24,9 +12,21 @@
     "# !pip install ./wandb-addons[prompts] openai wandb -qqq"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2",
+   "metadata": {},
+   "source": [
+    "# LLM Tracing with W&B\n",
+    "\n",
+    "## 1. Auto-logging\n",
+    "\n",
+    "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B autologging, also available for other popular LLMs and libraries such as Cohere or HuggingFace Pipelines. "
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "6512739b-fe35-4901-acb3-05df46b5ed9c",
    "metadata": {},
    "outputs": [],
@@ -37,11 +37,6 @@
     "import datetime\n",
     "\n",
     "import openai\n",
-    "import tiktoken\n",
-    "\n",
-    "from pathlib import Path\n",
-    "from pprint import pprint\n",
-    "from getpass import getpass\n",
     "\n",
     "from rich.markdown import Markdown\n",
     "import pandas as pd\n",
@@ -57,7 +52,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "83639bac-5860-4db1-9867-7c89f3ca25a6",
    "metadata": {},
    "outputs": [],
@@ -67,98 +62,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33manony-moose-57595088200203951\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "Tracking run with wandb version 0.15.5"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_085050-g2v28ryo</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">revived-dew-9</a></strong> to <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View project at <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run at <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Do NOT share these links with anyone. They can be used to claim your runs."
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "autolog({\"project\":PROJECT, \"job_type\": \"generation\", \"anonymous\":\"allow\"})"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "b2ab394b-295b-4cfa-aade-aa274003a56a",
    "metadata": {},
    "outputs": [],
@@ -170,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "076e62a1-188f-47e1-bda9-5e3619e7d4ba",
    "metadata": {},
    "outputs": [],
@@ -180,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "736fe64f-5cca-4316-8842-588b948193de",
    "metadata": {},
    "outputs": [],
@@ -202,93 +116,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "197a256e-834f-42ee-8680-0e5cc53903cb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# openai.api_key = os.environ[\"OPENAI_API_KEY\"]  # this probably needs some setup on their platform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "690e6e0a-193b-41c8-86c4-526f8061dd94",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Artifacts logged anonymously cannot be claimed and expire after 7 days.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Euphoric Guardian                                                                                                  \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "Euphoric Guardian                                                                                                  \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Harmony's Chosen                                                                                                   \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "Harmony's Chosen                                                                                                   \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\"Hilaria: The Laughing Guardian\"                                                                                   \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "\"Hilaria: The Laughing Guardian\"                                                                                   \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Unity's Embrace                                                                                                    \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "Unity's Embrace                                                                                                    \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Harmonic Defender                                                                                                  \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "Harmonic Defender                                                                                                  \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "system_prompt = \"\"\"You are a creative copywriter.\n",
     "You're given a category of game asset, and your goal is to design a name of that asset.\n",
@@ -299,83 +130,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "8343121b-2d47-47d1-b343-ec2393b8f02f",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Artifacts logged anonymously cannot be claimed and expire after 7 days.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Harmony Gems                                                                                                       \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "Harmony Gems                                                                                                       \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Gleam Haven                                                                                                        \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "Gleam Haven                                                                                                        \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Gleaming Harmony Gem                                                                                               \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "Gleaming Harmony Gem                                                                                               \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Gleamstone                                                                                                         \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "Gleamstone                                                                                                         \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Harmony Gems                                                                                                       \n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "Harmony Gems                                                                                                       \n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "user_prompt = \"jewel\"\n",
     "generate_and_print(system_prompt, user_prompt)"
@@ -383,78 +141,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "3266487e-150b-4dd8-9555-94e94a66aac1",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "084faecfc3a6412baa45b2aca421f0e1",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='0.015 MB of 0.015 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<style>\n",
-       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
-       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
-       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
-       "    </style>\n",
-       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>█▁</td></tr><tr><td>usage/elapsed_time</td><td>█▁</td></tr><tr><td>usage/prompt_tokens</td><td>▁█</td></tr><tr><td>usage/total_tokens</td><td>█▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>19</td></tr><tr><td>usage/elapsed_time</td><td>0.74012</td></tr><tr><td>usage/prompt_tokens</td><td>62</td></tr><tr><td>usage/total_tokens</td><td>81</td></tr></table><br/></div></div>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run <strong style=\"color:#cdcd00\">revived-dew-9</strong> at: <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a><br/>Synced 6 W&B file(s), 2 media file(s), 2 artifact file(s) and 0 other file(s)"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_085050-g2v28ryo/logs</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "wandb.finish()"
    ]
@@ -466,12 +156,12 @@
    "source": [
     "## 2. Using Tracer to log more complex chains\n",
     "\n",
-    "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario. You can also use our native integration with libraries like Langchain or Llamaindex instead. "
+    "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "3c9fd404-51fd-44cf-b41e-b81dc589a4af",
    "metadata": {},
    "outputs": [],
@@ -485,28 +175,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "9d366dca-db12-4532-a98d-5b29fa8a0b71",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'a mystic medieval island inhabited by intelligent and funny frogs'"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "random.choice(worlds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "0db1e20a-87a8-4386-9a8d-727db9569cd7",
    "metadata": {},
    "outputs": [],
@@ -522,7 +191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "1a86f95e-ed0d-4989-8c1d-5b88cdac7999",
    "metadata": {},
    "outputs": [],
@@ -533,20 +202,11 @@
     "\n",
     "    root_span = Trace(\n",
     "          name=\"MyCreativeChain\",\n",
-    "          kind=\"agent\",\n",
+    "          kind=\"chain\",\n",
     "          start_time_ms=start_time_ms,\n",
     "          metadata={\"user\": \"student_1\"})\n",
     "\n",
-    "    # part 2 - The chain calls into a child chain..\n",
-    "    chain_span = Trace(\n",
-    "          name=\"MyChain\",\n",
-    "          kind=\"chain\",\n",
-    "          start_time_ms=start_time_ms)\n",
-    "\n",
-    "    # add the Chain span as a child of the root\n",
-    "    root_span.add_child(chain_span)\n",
-    "\n",
-    "    # part 3 - your chain picks a fantasy world\n",
+    "    # part 2 - your chain picks a fantasy world\n",
     "    time.sleep(3)\n",
     "    world = random.choice(worlds)\n",
     "    expanded_prompt = f'Game asset category: {query}; fantasy world description: {world}'\n",
@@ -563,9 +223,9 @@
     "          outputs={\"result\": expanded_prompt})\n",
     "\n",
     "    # add the TOOL span as a child of the root\n",
-    "    chain_span.add_child(tool_span)\n",
+    "    root_span.add_child(tool_span)\n",
     "\n",
-    "    # part 4 - the LLMChain calls an OpenAI LLM...\n",
+    "    # part 3 - the LLMChain calls an OpenAI LLM...\n",
     "    messages=[\n",
     "      {\"role\": \"system\", \"content\": system_message},\n",
     "      {\"role\": \"user\", \"content\": expanded_prompt}\n",
@@ -593,126 +253,27 @@
     "          )\n",
     "\n",
     "    # add the LLM span as a child of the Chain span...\n",
-    "    chain_span.add_child(llm_span)\n",
+    "    root_span.add_child(llm_span)\n",
     "\n",
     "    # update the end time of the Chain span\n",
-    "    chain_span.add_inputs_and_outputs(\n",
+    "    root_span.add_inputs_and_outputs(\n",
     "          inputs={\"query\":query},\n",
     "          outputs={\"response\": response_text})\n",
     "\n",
     "    # update the Chain span's end time\n",
-    "    chain_span._span.end_time_ms = llm_end_time_ms\n",
-    "\n",
-    "    # part 5 - the final results from the tool are added \n",
-    "    root_span.add_inputs_and_outputs(inputs={\"query\": query},\n",
-    "                                     outputs={\"result\": response_text})\n",
     "    root_span._span.end_time_ms = llm_end_time_ms\n",
     "\n",
-    "    # part 6 - log all spans to W&B by logging the root span\n",
-    "    root_span.log(name=\"trace\")\n",
+    "    # part 4 - log all spans to W&B by logging the root span\n",
+    "    root_span.log(name=\"creative_trace\")\n",
     "    print(f\"Result: {response_text}\")\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "f8500843-6d4b-4fc6-93b9-4cadf5813e4a",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e28b19b74fd0478381e1f535cf6a2655",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670242549995842, max=1.0…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Tracking run with wandb version 0.15.5"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_085107-3awycdve</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/3awycdve?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">faithful-leaf-10</a></strong> to <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View project at <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run at <a href='https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/3awycdve?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/3awycdve?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Do NOT share these links with anyone. They can be used to claim your runs."
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Result: Croakvalor\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")\n",
     "\n",
@@ -721,18 +282,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "538d7bf3-4ae1-4b57-8a96-a34ea0614ec3",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Result: Volcanium Gem\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "run_creative_chain(\"jewel\")"
    ]
@@ -754,17 +307,27 @@
    "source": [
     "## Langchain agent\n",
     "\n",
-    "WIP: add langchain agent - adding names and evaluating if they are good. Wrap a previous function as a langchain tool. \n",
-    "\n",
-    "Demonstrate W&B Tracer autologging. "
+    "In the third scenario, we'll introduce an agent that will use tools such as WorldPicker and NameValidator to come up with the ultimate name. We will also use Langchain here and demonstrate its W&B integration."
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "d05ae137-04ef-4f7c-9cdd-67b71b92db3a",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "726e0a6a-699b-434d-8c51-7542b4f981dd",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "## Finish the run"
+    "# Import things that are needed generically\n",
+    "from langchain.agents import AgentType, initialize_agent\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.tools import BaseTool\n",
+    "\n",
+    "from typing import Optional\n",
+    "\n",
+    "from langchain.callbacks.manager import (\n",
+    "    AsyncCallbackManagerForToolRun,\n",
+    "    CallbackManagerForToolRun,\n",
+    ")"
    ]
   },
   {
@@ -773,12 +336,151 @@
    "id": "5738431a-e281-4abf-9837-44fec6811ff4",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ac08f78b-0962-4d84-b39a-21ee5e5d606b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.environ[\"LANGCHAIN_WANDB_TRACING\"] = \"true\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "539bc081-d1e3-4376-a817-23aa1d7ab2b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class WorldPickerTool(BaseTool):\n",
+    "    name = \"pick_world\"\n",
+    "    description = \"pick a virtual game world for your character or item naming\"\n",
+    "    worlds = [\n",
+    "                \"a mystic medieval island inhabited by intelligent and funny frogs\",\n",
+    "                \"a modern anthill featuring a cyber-ant queen and her cyber-ant-workers\",\n",
+    "                \"a digital world inhabited by friendly machine learning engineers\"\n",
+    "            ]\n",
+    "\n",
+    "    def _run(\n",
+    "        self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None\n",
+    "    ) -> str:\n",
+    "        \"\"\"Use the tool.\"\"\"\n",
+    "        time.sleep(1)\n",
+    "        return random.choice(self.worlds)\n",
+    "\n",
+    "    async def _arun(\n",
+    "        self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None\n",
+    "    ) -> str:\n",
+    "        \"\"\"Use the tool asynchronously.\"\"\"\n",
+    "        raise NotImplementedError(\"custom_search does not support async\")\n",
+    "        \n",
+    "class NameValidatorTool(BaseTool):\n",
+    "    name = \"validate_name\"\n",
+    "    description = \"validate if the name is properly generated\"\n",
+    "\n",
+    "    def _run(\n",
+    "        self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None\n",
+    "    ) -> str:\n",
+    "        \"\"\"Use the tool.\"\"\"\n",
+    "        time.sleep(1)\n",
+    "        if len(query) < 20:\n",
+    "            return f\"This is a correct name: {query}\"\n",
+    "        else:\n",
+    "            return f\"This name is too long. It should be shorter than 20 characters.\"\n",
+    "\n",
+    "    async def _arun(\n",
+    "        self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None\n",
+    "    ) -> str:\n",
+    "        \"\"\"Use the tool asynchronously.\"\"\"\n",
+    "        raise NotImplementedError(\"custom_search does not support async\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c515ee33-1d6f-47e7-aceb-845c363eee29",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = ChatOpenAI(temperature=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "989407f4-0e10-4446-90d1-992c3b4c9483",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tools = [WorldPickerTool(), NameValidatorTool()]\n",
+    "agent = initialize_agent(\n",
+    "    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d4bd42d-9c95-4e02-8679-99ca43d0aa71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent.run(\n",
+    "    \"Find a virtual game world for me and imagine the name of a hero in that world\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dbb5ea87-a9b9-462f-80bf-b56d681dec8c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent.run(\n",
+    "    \"Find a virtual game world for me and imagine the name of a jewel in that world\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d101fcd-cd7d-4ede-ad95-412c1cd72e46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agent.run(\n",
+    "    \"Find a virtual game world for me and imagine the name of food in that world\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "486c688c-2ca2-4fe5-8f22-afd194b3e34d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wandb.finish()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18b6af79-9de7-4bfd-b8ea-6b4f2b405d0a",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -792,7 +494,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.8.13"
   }
  },
  "nbformat": 4,

From bff4b3127e598621fd32e1655ba98913fc27ce3e Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 12:28:04 +0000
Subject: [PATCH 08/43] final refactors

---
 dlai/01_diffusion_training.ipynb | 577 +++++++++++++++++++++++++++++--
 dlai/02_diffusion_sampling.ipynb | 139 ++++----
 2 files changed, 619 insertions(+), 97 deletions(-)

diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index 264e6272..9d3845e8 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "b88f9513",
    "metadata": {},
    "outputs": [],
@@ -49,12 +49,12 @@
    "id": "7c0d229a",
    "metadata": {},
    "source": [
-    "# Setting Things Up"
+    "## Setting Things Up"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "54c3a942",
    "metadata": {
     "tags": []
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "id": "a705d0a8",
    "metadata": {
     "tags": []
@@ -128,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "6bc9001e",
    "metadata": {
     "tags": []
@@ -141,7 +141,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "76c63b85",
    "metadata": {},
    "outputs": [
@@ -163,7 +163,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "eb13689d",
    "metadata": {},
    "outputs": [],
@@ -193,7 +193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "8b0f5bed",
    "metadata": {},
    "outputs": [],
@@ -209,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "id": "16085a65",
    "metadata": {},
    "outputs": [],
@@ -242,7 +242,7 @@
    "id": "d9ed46d7",
    "metadata": {},
    "source": [
-    "# Training"
+    "## Training"
    ]
   },
   {
@@ -256,7 +256,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "id": "d88afdba",
    "metadata": {
     "tags": []
@@ -271,9 +271,19 @@
     "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "26765a7e-4ddc-449e-95c3-54c58a564738",
+   "metadata": {},
+   "source": [
+    "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n",
+    "\n",
+    "### You can visit the result of this [training here](https://wandb.ai)"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "id": "f5f4af69",
    "metadata": {},
    "outputs": [
@@ -282,13 +292,13 @@
      "output_type": "stream",
      "text": [
       "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
      ]
     },
     {
      "data": {
       "text/html": [
-       "Tracking run with wandb version 0.15.4"
+       "Tracking run with wandb version 0.15.5"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -300,7 +310,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/Users/tcapelle/work/edu/dlai/wandb/run-20230705_183609-09mz5kur</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_120421-lqf74fua</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -312,7 +322,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/capecape/dlai_diffusion/runs/09mz5kur' target=\"_blank\">sparkling-frost-38</a></strong> to <a href='https://wandb.ai/capecape/dlai_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua' target=\"_blank\">daily-frost-1</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -324,7 +334,7 @@
     {
      "data": {
       "text/html": [
-       " View project at <a href='https://wandb.ai/capecape/dlai_diffusion' target=\"_blank\">https://wandb.ai/capecape/dlai_diffusion</a>"
+       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -336,7 +346,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/capecape/dlai_diffusion/runs/09mz5kur' target=\"_blank\">https://wandb.ai/capecape/dlai_diffusion/runs/09mz5kur</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -348,7 +358,21 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b4565f2a74df4f878429f373cacde283",
+       "model_id": "6aef0bb639f74bdf97d56b0c0cd1ffc5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/32 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
        "version_major": 2,
        "version_minor": 0
       },
@@ -362,7 +386,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "862fdffecb0c40508029ce43d2b9d22f",
+       "model_id": "",
        "version_major": 2,
        "version_minor": 0
       },
@@ -374,28 +398,499 @@
      "output_type": "display_data"
     },
     {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[10], line 23\u001b[0m\n\u001b[1;32m     21\u001b[0m loss \u001b[39m=\u001b[39m F\u001b[39m.\u001b[39mmse_loss(pred_noise, noise)\n\u001b[1;32m     22\u001b[0m loss\u001b[39m.\u001b[39mbackward()    \n\u001b[0;32m---> 23\u001b[0m optim\u001b[39m.\u001b[39;49mstep()\n\u001b[1;32m     25\u001b[0m \u001b[39m# we log the relevant metrics to the workspace\u001b[39;00m\n\u001b[1;32m     26\u001b[0m wandb\u001b[39m.\u001b[39mlog({\u001b[39m\"\u001b[39m\u001b[39mloss\u001b[39m\u001b[39m\"\u001b[39m: loss\u001b[39m.\u001b[39mitem(),\n\u001b[1;32m     27\u001b[0m            \u001b[39m\"\u001b[39m\u001b[39mlr\u001b[39m\u001b[39m\"\u001b[39m: optim\u001b[39m.\u001b[39mparam_groups[\u001b[39m0\u001b[39m][\u001b[39m'\u001b[39m\u001b[39mlr\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m     28\u001b[0m            \u001b[39m\"\u001b[39m\u001b[39mepoch\u001b[39m\u001b[39m\"\u001b[39m: ep})\n",
-      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/optimizer.py:280\u001b[0m, in \u001b[0;36mOptimizer.profile_hook_step.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    276\u001b[0m         \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    277\u001b[0m             \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mfunc\u001b[39m}\u001b[39;00m\u001b[39m must return None or a tuple of (new_args, new_kwargs),\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    278\u001b[0m                                \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mbut got \u001b[39m\u001b[39m{\u001b[39;00mresult\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 280\u001b[0m out \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m    281\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_optimizer_step_code()\n\u001b[1;32m    283\u001b[0m \u001b[39m# call optimizer step post hooks\u001b[39;00m\n",
-      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/optimizer.py:33\u001b[0m, in \u001b[0;36m_use_grad_for_differentiable.<locals>._use_grad\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     31\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m     32\u001b[0m     torch\u001b[39m.\u001b[39mset_grad_enabled(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdefaults[\u001b[39m'\u001b[39m\u001b[39mdifferentiable\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[0;32m---> 33\u001b[0m     ret \u001b[39m=\u001b[39m func(\u001b[39mself\u001b[39;49m, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m     34\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m     35\u001b[0m     torch\u001b[39m.\u001b[39mset_grad_enabled(prev_grad)\n",
-      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:141\u001b[0m, in \u001b[0;36mAdam.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m    130\u001b[0m     beta1, beta2 \u001b[39m=\u001b[39m group[\u001b[39m'\u001b[39m\u001b[39mbetas\u001b[39m\u001b[39m'\u001b[39m]\n\u001b[1;32m    132\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_group(\n\u001b[1;32m    133\u001b[0m         group,\n\u001b[1;32m    134\u001b[0m         params_with_grad,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    138\u001b[0m         max_exp_avg_sqs,\n\u001b[1;32m    139\u001b[0m         state_steps)\n\u001b[0;32m--> 141\u001b[0m     adam(\n\u001b[1;32m    142\u001b[0m         params_with_grad,\n\u001b[1;32m    143\u001b[0m         grads,\n\u001b[1;32m    144\u001b[0m         exp_avgs,\n\u001b[1;32m    145\u001b[0m         exp_avg_sqs,\n\u001b[1;32m    146\u001b[0m         max_exp_avg_sqs,\n\u001b[1;32m    147\u001b[0m         state_steps,\n\u001b[1;32m    148\u001b[0m         amsgrad\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mamsgrad\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    149\u001b[0m         beta1\u001b[39m=\u001b[39;49mbeta1,\n\u001b[1;32m    150\u001b[0m         beta2\u001b[39m=\u001b[39;49mbeta2,\n\u001b[1;32m    151\u001b[0m         lr\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mlr\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    152\u001b[0m         weight_decay\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mweight_decay\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    153\u001b[0m         eps\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39meps\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    154\u001b[0m         maximize\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mmaximize\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    155\u001b[0m         foreach\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mforeach\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    156\u001b[0m         capturable\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mcapturable\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    157\u001b[0m         differentiable\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mdifferentiable\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    158\u001b[0m         fused\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mfused\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m    159\u001b[0m         grad_scale\u001b[39m=\u001b[39;49m\u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mgrad_scale\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m    160\u001b[0m         found_inf\u001b[39m=\u001b[39;49m\u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mfound_inf\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m    161\u001b[0m     )\n\u001b[1;32m    163\u001b[0m \u001b[39mreturn\u001b[39;00m loss\n",
-      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:281\u001b[0m, in \u001b[0;36madam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach, capturable, differentiable, fused, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)\u001b[0m\n\u001b[1;32m    278\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m    279\u001b[0m     func \u001b[39m=\u001b[39m _single_tensor_adam\n\u001b[0;32m--> 281\u001b[0m func(params,\n\u001b[1;32m    282\u001b[0m      grads,\n\u001b[1;32m    283\u001b[0m      exp_avgs,\n\u001b[1;32m    284\u001b[0m      exp_avg_sqs,\n\u001b[1;32m    285\u001b[0m      max_exp_avg_sqs,\n\u001b[1;32m    286\u001b[0m      state_steps,\n\u001b[1;32m    287\u001b[0m      amsgrad\u001b[39m=\u001b[39;49mamsgrad,\n\u001b[1;32m    288\u001b[0m      beta1\u001b[39m=\u001b[39;49mbeta1,\n\u001b[1;32m    289\u001b[0m      beta2\u001b[39m=\u001b[39;49mbeta2,\n\u001b[1;32m    290\u001b[0m      lr\u001b[39m=\u001b[39;49mlr,\n\u001b[1;32m    291\u001b[0m      weight_decay\u001b[39m=\u001b[39;49mweight_decay,\n\u001b[1;32m    292\u001b[0m      eps\u001b[39m=\u001b[39;49meps,\n\u001b[1;32m    293\u001b[0m      maximize\u001b[39m=\u001b[39;49mmaximize,\n\u001b[1;32m    294\u001b[0m      capturable\u001b[39m=\u001b[39;49mcapturable,\n\u001b[1;32m    295\u001b[0m      differentiable\u001b[39m=\u001b[39;49mdifferentiable,\n\u001b[1;32m    296\u001b[0m      grad_scale\u001b[39m=\u001b[39;49mgrad_scale,\n\u001b[1;32m    297\u001b[0m      found_inf\u001b[39m=\u001b[39;49mfound_inf)\n",
-      "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:344\u001b[0m, in \u001b[0;36m_single_tensor_adam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize, capturable, differentiable)\u001b[0m\n\u001b[1;32m    341\u001b[0m     param \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mview_as_real(param)\n\u001b[1;32m    343\u001b[0m \u001b[39m# Decay the first and second moment running average coefficient\u001b[39;00m\n\u001b[0;32m--> 344\u001b[0m exp_avg\u001b[39m.\u001b[39;49mmul_(beta1)\u001b[39m.\u001b[39madd_(grad, alpha\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m beta1)\n\u001b[1;32m    345\u001b[0m exp_avg_sq\u001b[39m.\u001b[39mmul_(beta2)\u001b[39m.\u001b[39maddcmul_(grad, grad\u001b[39m.\u001b[39mconj(), value\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m beta2)\n\u001b[1;32m    347\u001b[0m \u001b[39mif\u001b[39;00m capturable \u001b[39mor\u001b[39;00m differentiable:\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/894 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c2bd6d2f891d4c69a4c285591b2bc1e4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='51.649 MB of 51.655 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=0.9998…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<style>\n",
+       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
+       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
+       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
+       "    </style>\n",
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████</td></tr><tr><td>loss</td><td>█▆▅▅▄▃▃▃▃▃▄▃▃▃▃▃▃▂▂▂▃▂▂▂▂▂▂▂▂▃▃▂▂▁▁▁▂▂▁▁</td></tr><tr><td>lr</td><td>████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>31</td></tr><tr><td>loss</td><td>0.09235</td></tr><tr><td>lr</td><td>3e-05</td></tr></table><br/></div></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">daily-frost-1</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua</a><br/>Synced 6 W&B file(s), 288 media file(s), 9 artifact file(s) and 1 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20230706_120421-lqf74fua/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     }
    ],
    "source": [
     "# same code as before, added comments on the extra W&B instrumentation lines\n",
     "# create a wandb run\n",
-    "run = wandb.init(project=\"dlai_diffusion\", job_type=\"train_conditional\", config=config)\n",
+    "run = wandb.init(project=\"sprite_diffusion\", job_type=\"train\", anonymous=\"allow\", config=config)\n",
     "\n",
-    "for ep in range(n_epoch):\n",
+    "for ep in tqdm(range(n_epoch), leave=True, total=n_epoch):\n",
     "    # set into train mode\n",
     "    nn_model.train()\n",
     "    optim.param_groups[0]['lr'] = lrate*(1-ep/n_epoch)\n",
@@ -440,6 +935,14 @@
     "# finish W&B run\n",
     "wandb.finish()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "78ddaed3-8184-4161-a1d6-5af139b336d0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -458,7 +961,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,
diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index 0cc8f093..f831e4bb 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 28,
    "id": "700e687c",
    "metadata": {
     "tags": []
@@ -29,6 +29,16 @@
     "import wandb"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "beab0dad-c211-4e3c-ab80-de52788f27e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# wandb.login() # uncomment if you want to login to wandb"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -40,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 29,
    "id": "54c3a942",
    "metadata": {
     "tags": []
@@ -48,14 +58,14 @@
    "outputs": [],
    "source": [
     "# Wandb Params\n",
-    "project = \"debug_dlai\"\n",
-    "entity = \"capecape\"\n",
+    "PROJECT = \"sprite_diffusion\"\n",
+    "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:v0\" \n",
     "\n",
     "# ddpm sampler hyperparameters\n",
     "timesteps = 500\n",
     "beta1 = 1e-4\n",
     "beta2 = 0.02\n",
-    "num_samples = 32\n",
+    "num_samples = 30\n",
     "height = 16\n",
     "ddim_n = 25\n",
     "\n",
@@ -84,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 30,
    "id": "8ab66255",
    "metadata": {},
    "outputs": [],
@@ -99,7 +109,8 @@
     "    producer_run = artifact.logged_by()\n",
     "\n",
     "    # load the weights dictionary\n",
-    "    model_weights = torch.load(model_path + f\"/context_model.pth\", map_location=\"cpu\")\n",
+    "    model_weights = torch.load(model_path + f\"/context_model.pth\", \n",
+    "                               map_location=\"cpu\")\n",
     "\n",
     "    # create the model\n",
     "    model = ContextUnet(in_channels=3, \n",
@@ -117,7 +128,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 31,
    "id": "b47633e2",
    "metadata": {},
    "outputs": [
@@ -130,7 +141,7 @@
     }
    ],
    "source": [
-    "nn_model = load_model('capecape/dlai_diffusion/w1r7jpji_context_model:v8')"
+    "nn_model = load_model(MODEL_ARTIFACT)"
    ]
   },
   {
@@ -153,7 +164,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 32,
    "id": "f6f479d1",
    "metadata": {},
    "outputs": [],
@@ -167,7 +178,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 33,
    "id": "8b0f5bed",
    "metadata": {},
    "outputs": [],
@@ -181,15 +192,23 @@
     "    return mean + noise"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "895b2bbc-46cd-4be8-a672-e84ef69bc329",
+   "metadata": {},
+   "source": [
+    "sample with context using standard algorithm\n",
+    "we make a change to the original algorithm to allow for context \n",
+    "and pass a fixed noise tensor (samples)"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 34,
    "id": "16085a65",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# sample with context using standard algorithm\n",
-    "# we make a change to the original algorithm to allow for context and passing a noise tensor (samples)\n",
     "@torch.no_grad()\n",
     "def sample_ddpm_context(samples, context, save_rate=20):\n",
     "    # array to keep track of generated steps for plotting\n",
@@ -201,7 +220,7 @@
     "        # sample some random noise to inject back in. For i = 1, don't add back in noise\n",
     "        z = torch.randn_like(samples) if i > 1 else 0\n",
     "\n",
-    "        eps = nn_model(samples, t, c=context)    # predict noise e_(x_t,t, ctx)\n",
+    "        eps = nn_model(samples, t, c=context)    # predict noise\n",
     "        samples = denoise_add_noise(samples, i, eps, z)\n",
     "        if i % save_rate==0 or i==timesteps or i<8:\n",
     "            print(f'sampling timestep {i:3d}', end='\\r')\n",
@@ -222,7 +241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 35,
    "id": "d88afdba",
    "metadata": {
     "tags": []
@@ -234,7 +253,12 @@
     "noises = torch.randn(num_samples, 3, height, height).to(device)  \n",
     "\n",
     "# A fixed context vector to sample from\n",
-    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()"
+    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
+    "                                     1,1,1,1,1,1,\n",
+    "                                     2,2,2,2,2,2,\n",
+    "                                     3,3,3,3,3,3,\n",
+    "                                     4,4,4,4,4,4]), \n",
+    "                       5).to(device=device).float()"
    ]
   },
   {
@@ -248,7 +272,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 36,
    "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc",
    "metadata": {},
    "outputs": [],
@@ -267,7 +291,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 37,
    "id": "5fdfc048-47f0-43b5-983e-da715e1ed562",
    "metadata": {
     "tags": []
@@ -305,7 +329,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 38,
    "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2",
    "metadata": {},
    "outputs": [],
@@ -324,7 +348,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 39,
    "id": "89e24210-4885-4559-92e1-db10566ef5ea",
    "metadata": {},
    "outputs": [
@@ -351,7 +375,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 40,
    "id": "25b07c26-0ac2-428a-8351-34f8b7228074",
    "metadata": {},
    "outputs": [
@@ -369,14 +393,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 50,
    "id": "af33d3c4",
    "metadata": {},
    "outputs": [],
    "source": [
     "def ctx_to_classes(ctx_vector):\n",
     "    classes = \"hero,non-hero,food,spell,side-facing\".split(\",\")\n",
-    "    return [classes[i] for i in [ctx_vector[i].argmax().item() for i in range(ctx_vector.shape[0])]]"
+    "    return [classes[i] for i in ctx_vector.argmax(dim=1)]"
    ]
   },
   {
@@ -399,7 +423,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 42,
    "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3",
    "metadata": {},
    "outputs": [],
@@ -422,24 +446,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 43,
    "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27",
-   "metadata": {
-    "scrolled": true
-   },
+   "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
-     ]
-    },
     {
      "data": {
       "text/html": [
-       "Tracking run with wandb version 0.15.4"
+       "Tracking run with wandb version 0.15.5"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -451,7 +465,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/Users/tcapelle/work/dlai/wandb_diffusion/wandb/run-20230704_142746-xfz2uh0q</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_122356-yw6l5eqs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -463,7 +477,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q' target=\"_blank\">winter-feather-1</a></strong> to <a href='https://wandb.ai/capecape/debug_dlai' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs' target=\"_blank\">earthy-firebrand-4</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -475,7 +489,7 @@
     {
      "data": {
       "text/html": [
-       " View project at <a href='https://wandb.ai/capecape/debug_dlai' target=\"_blank\">https://wandb.ai/capecape/debug_dlai</a>"
+       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -487,7 +501,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q' target=\"_blank\">https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -511,7 +525,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">winter-feather-1</strong> at: <a href='https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q' target=\"_blank\">https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q</a><br/>Synced 6 W&B file(s), 1 media file(s), 97 artifact file(s) and 1 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">earthy-firebrand-4</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs</a><br/>Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 1 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -523,7 +537,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230704_142746-xfz2uh0q/logs</code>"
+       "Find logs at: <code>./wandb/run-20230706_122356-yw6l5eqs/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -534,8 +548,8 @@
     }
    ],
    "source": [
-    "with wandb.init(project=project, entity=entity, job_type=\"samplers_battle\", config=config):\n",
-    "    wandb.log({\"samplers_tables\":table})"
+    "with wandb.init(project=PROJECT, job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n",
+    "    wandb.log({\"samplers_table\":table})"
    ]
   },
   {
@@ -549,7 +563,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 44,
    "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a",
    "metadata": {},
    "outputs": [
@@ -579,17 +593,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 45,
    "id": "626ef616-dae4-4417-9219-d67ef0794e63",
    "metadata": {},
    "outputs": [],
    "source": [
-    "hero_table = wandb.Table(columns=[\"generation\", \"hero\", \"non-hero\", \"food\", \"spell\", \"side-facing\"])"
+    "hero_table = wandb.Table(columns=[\"generation\", \n",
+    "                                  \"hero\", \n",
+    "                                  \"non-hero\", \n",
+    "                                  \"food\", \n",
+    "                                  \"spell\", \n",
+    "                                  \"side-facing\"])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 46,
    "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d",
    "metadata": {},
    "outputs": [],
@@ -600,7 +619,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 47,
    "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c",
    "metadata": {
     "scrolled": true
@@ -609,7 +628,7 @@
     {
      "data": {
       "text/html": [
-       "Tracking run with wandb version 0.15.4"
+       "Tracking run with wandb version 0.15.5"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -621,7 +640,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/Users/tcapelle/work/dlai/wandb_diffusion/wandb/run-20230704_142806-sv5fvps1</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_122412-mlom51n4</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -633,7 +652,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1' target=\"_blank\">summer-spaceship-2</a></strong> to <a href='https://wandb.ai/capecape/debug_dlai' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4' target=\"_blank\">avid-haze-5</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -645,7 +664,7 @@
     {
      "data": {
       "text/html": [
-       " View project at <a href='https://wandb.ai/capecape/debug_dlai' target=\"_blank\">https://wandb.ai/capecape/debug_dlai</a>"
+       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -657,7 +676,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1' target=\"_blank\">https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -681,7 +700,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">summer-spaceship-2</strong> at: <a href='https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1' target=\"_blank\">https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1</a><br/>Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">avid-haze-5</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4</a><br/>Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -693,7 +712,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230704_142806-sv5fvps1/logs</code>"
+       "Find logs at: <code>./wandb/run-20230706_122412-mlom51n4/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -704,7 +723,7 @@
     }
    ],
    "source": [
-    "with wandb.init(project=project, entity=entity, job_type=\"sampling_mix\", config=config):\n",
+    "with wandb.init(project=PROJECT, job_type=\"sampling_mix\", config=config):\n",
     "    wandb.log({\"hero_table\":hero_table})"
    ]
   }
@@ -725,7 +744,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,

From c7f91ad8e91cb5c14ba8ccebe2ded1fdd13f6295 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 12:36:31 +0000
Subject: [PATCH 09/43] increase batch size to 10k

---
 dlai/00_intro.ipynb | 162 +++++++++++++++++++-------------------------
 1 file changed, 69 insertions(+), 93 deletions(-)

diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb
index 1cc2ecb6..d858d6ce 100644
--- a/dlai/00_intro.ipynb
+++ b/dlai/00_intro.ipynb
@@ -8,6 +8,7 @@
    "outputs": [],
    "source": [
     "import math\n",
+    "from pathlib import Path\n",
     "from types import SimpleNamespace\n",
     "\n",
     "import wandb\n",
@@ -24,11 +25,20 @@
   {
    "cell_type": "code",
    "execution_count": 2,
+   "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# wandb.login() # uncomment if you want to login to wandb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
    "id": "d51a9f7f",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Constants\n",
     "INPUT_SIZE = 3 * 16 * 16\n",
     "OUTPUT_SIZE = 5\n",
     "HIDDEN_SIZE = 256\n",
@@ -37,12 +47,12 @@
     "\n",
     "# Device\n",
     "device = torch.device(\"cuda\" if torch.cuda.is_available()  else \"cpu\")\n",
-    "device = torch.device(\"mps\" if torch.backends.mps.is_built() else \"cpu\")\n",
     "\n",
-    "data_dir = './data/'\n",
+    "data_dir = Path('./data/')\n",
     "\n",
     "def get_dataloaders(batch_size, slice_size=None, valid_pct=0.2):\n",
-    "    dataset = CustomDataset.from_np(data_dir + \"sprites_1788_16x16.npy\", data_dir + \"sprite_labels_nc_1788_16x16.npy\", argmax=True)\n",
+    "    dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", \n",
+    "                                    data_dir/\"sprite_labels_nc_1788_16x16.npy\", argmax=True)\n",
     "\n",
     "    if slice_size:\n",
     "        dataset = dataset.subset(slice_size)\n",
@@ -67,28 +77,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "8700b5fa",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sprite shape: (89400, 16, 16, 3)\n",
-      "labels shape: (89400,)\n",
-      "sprite shape: (1000, 16, 16, 3)\n",
-      "labels shape: (1000,)\n"
-     ]
-    }
-   ],
-   "source": [
-    "train_dl, valid_dl = get_dataloaders(128, slice_size=1000)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 4,
    "id": "8401cf96",
    "metadata": {},
    "outputs": [],
@@ -130,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 5,
    "id": "e5492ebb-2dfa-44ce-af6c-24655e45a2ed",
    "metadata": {},
    "outputs": [],
@@ -138,8 +127,9 @@
     "def train_model(config):\n",
     "    \"Train a model with a given config\"\n",
     "    wandb.init(\n",
-    "        project=\"deeplearningai-intro\",\n",
-    "        config=config\n",
+    "        project=\"intro\",\n",
+    "        config=config,\n",
+    "        anonymous=\"allow\",\n",
     "    )\n",
     "\n",
     "    # Get the data\n",
@@ -195,7 +185,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 6,
    "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee",
    "metadata": {},
    "outputs": [],
@@ -205,21 +195,29 @@
     "    batch_size = 128,\n",
     "    lr = 1e-3,\n",
     "    dropout = 0.1,\n",
-    "    slice_size = 1000,\n",
+    "    slice_size = 10_000,\n",
     "    valid_pct = 0.2,\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 7,
    "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
-       "Tracking run with wandb version 0.15.4"
+       "Tracking run with wandb version 0.15.5"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -231,7 +229,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/Users/tcapelle/work/dlai/wandb/run-20230705_144549-dg3tar8b</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123525-7s54fntl</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -243,7 +241,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b' target=\"_blank\">morning-jazz-7</a></strong> to <a href='https://wandb.ai/capecape/deeplearningai-intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl' target=\"_blank\">clear-sound-7</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -255,7 +253,7 @@
     {
      "data": {
       "text/html": [
-       " View project at <a href='https://wandb.ai/capecape/deeplearningai-intro' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro</a>"
+       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -267,7 +265,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -282,14 +280,14 @@
      "text": [
       "sprite shape: (89400, 16, 16, 3)\n",
       "labels shape: (89400,)\n",
-      "sprite shape: (1000, 16, 16, 3)\n",
-      "labels shape: (1000,)\n"
+      "sprite shape: (10000, 16, 16, 3)\n",
+      "labels shape: (10000,)\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c2a535bd5a0d481e9916ab5f71edbf41",
+       "model_id": "e77b2cb6e7094dec858054a0ef68c5d3",
        "version_major": 2,
        "version_minor": 0
       },
@@ -312,20 +310,6 @@
      "metadata": {},
      "output_type": "display_data"
     },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "70d7c9e0e2fb4edd969b284271d72c6f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='0.161 MB of 0.161 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "data": {
       "text/html": [
@@ -334,7 +318,7 @@
        "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
        "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
        "    </style>\n",
-       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇██</td></tr><tr><td>train/example_ct</td><td>▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇███</td></tr><tr><td>train/train_loss</td><td>█▆▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁▆█</td></tr><tr><td>val/val_loss</td><td>█▃▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>2400</td></tr><tr><td>train/train_loss</td><td>0.21717</td></tr><tr><td>val/val_accuracy</td><td>0.92</td></tr><tr><td>val/val_loss</td><td>0.32078</td></tr></table><br/></div></div>"
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███</td></tr><tr><td>train/example_ct</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███</td></tr><tr><td>train/train_loss</td><td>█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁▅█</td></tr><tr><td>val/val_loss</td><td>█▃▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>24000</td></tr><tr><td>train/train_loss</td><td>0.0144</td></tr><tr><td>val/val_accuracy</td><td>1.0</td></tr><tr><td>val/val_loss</td><td>0.00669</td></tr></table><br/></div></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -346,7 +330,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">morning-jazz-7</strong> at: <a href='https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b</a><br/>Synced 7 W&B file(s), 1 media file(s), 126 artifact file(s) and 2 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">clear-sound-7</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl</a><br/>Synced 7 W&B file(s), 1 media file(s), 127 artifact file(s) and 2 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -358,7 +342,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230705_144549-dg3tar8b/logs</code>"
+       "Find logs at: <code>./wandb/run-20230706_123525-7s54fntl/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -383,14 +367,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 8,
    "id": "4f40520a-66f8-4415-9e36-174dda06aca0",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8acd0e8533e84ae88b608e748a904422",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670229483376413, max=1.0…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "data": {
       "text/html": [
-       "Tracking run with wandb version 0.15.4"
+       "Tracking run with wandb version 0.15.5"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -402,7 +400,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/Users/tcapelle/work/dlai/wandb/run-20230705_144416-iysb84lz</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123542-lbawks79</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -414,7 +412,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz' target=\"_blank\">icy-water-6</a></strong> to <a href='https://wandb.ai/capecape/deeplearningai-intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79' target=\"_blank\">fearless-bird-8</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -426,7 +424,7 @@
     {
      "data": {
       "text/html": [
-       " View project at <a href='https://wandb.ai/capecape/deeplearningai-intro' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro</a>"
+       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -438,7 +436,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -453,14 +451,14 @@
      "text": [
       "sprite shape: (89400, 16, 16, 3)\n",
       "labels shape: (89400,)\n",
-      "sprite shape: (1000, 16, 16, 3)\n",
-      "labels shape: (1000,)\n"
+      "sprite shape: (10000, 16, 16, 3)\n",
+      "labels shape: (10000,)\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e576d9f253b6400d80f8022dafbdd326",
+       "model_id": "e8b441fe3ba44768b4b2215b68a437b6",
        "version_major": 2,
        "version_minor": 0
       },
@@ -483,20 +481,6 @@
      "metadata": {},
      "output_type": "display_data"
     },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7bd6b3a334c94ce58afd7c25e10d8d5a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='0.161 MB of 0.161 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "data": {
       "text/html": [
@@ -505,7 +489,7 @@
        "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
        "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
        "    </style>\n",
-       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇██</td></tr><tr><td>train/example_ct</td><td>▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇███</td></tr><tr><td>train/train_loss</td><td>█▆▅▄▄▃▃▂▂▂▂▁▂▂▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁██</td></tr><tr><td>val/val_loss</td><td>█▂▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>2400</td></tr><tr><td>train/train_loss</td><td>0.27209</td></tr><tr><td>val/val_accuracy</td><td>0.92</td></tr><tr><td>val/val_loss</td><td>0.32183</td></tr></table><br/></div></div>"
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███</td></tr><tr><td>train/example_ct</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███</td></tr><tr><td>train/train_loss</td><td>█▅▃▃▃▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁██</td></tr><tr><td>val/val_loss</td><td>█▂▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>24000</td></tr><tr><td>train/train_loss</td><td>0.02836</td></tr><tr><td>val/val_accuracy</td><td>1.0</td></tr><tr><td>val/val_loss</td><td>0.00944</td></tr></table><br/></div></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -517,7 +501,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">icy-water-6</strong> at: <a href='https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz' target=\"_blank\">https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz</a><br/>Synced 7 W&B file(s), 1 media file(s), 126 artifact file(s) and 2 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">fearless-bird-8</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79</a><br/>Synced 7 W&B file(s), 1 media file(s), 124 artifact file(s) and 2 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -529,7 +513,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230705_144416-iysb84lz/logs</code>"
+       "Find logs at: <code>./wandb/run-20230706_123542-lbawks79/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -543,19 +527,11 @@
     "config.dropout = 0.5\n",
     "train_model(config)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bba77c03-fcd2-43ef-9a11-8cebef617c23",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -569,7 +545,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,

From e3ff1ff7cc9d1b5f59656503fd212170a602d150 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 12:38:14 +0000
Subject: [PATCH 10/43] use pathlib

---
 dlai/02_diffusion_sampling.ipynb | 73 ++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 32 deletions(-)

diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index f831e4bb..96e13a72 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -14,13 +14,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 1,
    "id": "700e687c",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
+    "from pathlib import Path\n",
     "import torch\n",
     "import torch.nn.functional as F\n",
     "import numpy as np\n",
@@ -50,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 3,
    "id": "54c3a942",
    "metadata": {
     "tags": []
@@ -94,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 4,
    "id": "8ab66255",
    "metadata": {},
    "outputs": [],
@@ -103,13 +104,13 @@
     "    \"Load the model from wandb artifacts\"\n",
     "    api = wandb.Api()\n",
     "    artifact = api.artifact(model_artifact_name, type=\"model\")\n",
-    "    model_path = artifact.download()\n",
+    "    model_path = Path(artifact.download())\n",
     "\n",
     "    # recover model info from the registry\n",
     "    producer_run = artifact.logged_by()\n",
     "\n",
     "    # load the weights dictionary\n",
-    "    model_weights = torch.load(model_path + f\"/context_model.pth\", \n",
+    "    model_weights = torch.load(model_path/\"context_model.pth\", \n",
     "                               map_location=\"cpu\")\n",
     "\n",
     "    # create the model\n",
@@ -128,7 +129,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 5,
    "id": "b47633e2",
    "metadata": {},
    "outputs": [
@@ -164,7 +165,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 6,
    "id": "f6f479d1",
    "metadata": {},
    "outputs": [],
@@ -178,7 +179,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 7,
    "id": "8b0f5bed",
    "metadata": {},
    "outputs": [],
@@ -204,7 +205,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 8,
    "id": "16085a65",
    "metadata": {},
    "outputs": [],
@@ -241,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 9,
    "id": "d88afdba",
    "metadata": {
     "tags": []
@@ -272,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 10,
    "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc",
    "metadata": {},
    "outputs": [],
@@ -291,7 +292,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 11,
    "id": "5fdfc048-47f0-43b5-983e-da715e1ed562",
    "metadata": {
     "tags": []
@@ -329,7 +330,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 12,
    "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2",
    "metadata": {},
    "outputs": [],
@@ -348,7 +349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 13,
    "id": "89e24210-4885-4559-92e1-db10566ef5ea",
    "metadata": {},
    "outputs": [
@@ -375,7 +376,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 14,
    "id": "25b07c26-0ac2-428a-8351-34f8b7228074",
    "metadata": {},
    "outputs": [
@@ -393,7 +394,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 15,
    "id": "af33d3c4",
    "metadata": {},
    "outputs": [],
@@ -423,7 +424,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 16,
    "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3",
    "metadata": {},
    "outputs": [],
@@ -446,10 +447,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 17,
    "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
     {
      "data": {
       "text/html": [
@@ -465,7 +474,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_122356-yw6l5eqs</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123733-eztcx1w7</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -477,7 +486,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs' target=\"_blank\">earthy-firebrand-4</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7' target=\"_blank\">fresh-frost-7</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -501,7 +510,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -525,7 +534,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">earthy-firebrand-4</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs</a><br/>Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 1 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">fresh-frost-7</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7</a><br/>Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 2 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -537,7 +546,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_122356-yw6l5eqs/logs</code>"
+       "Find logs at: <code>./wandb/run-20230706_123733-eztcx1w7/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -563,7 +572,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 18,
    "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a",
    "metadata": {},
    "outputs": [
@@ -593,7 +602,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 19,
    "id": "626ef616-dae4-4417-9219-d67ef0794e63",
    "metadata": {},
    "outputs": [],
@@ -608,7 +617,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 20,
    "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d",
    "metadata": {},
    "outputs": [],
@@ -619,7 +628,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 21,
    "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c",
    "metadata": {
     "scrolled": true
@@ -640,7 +649,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_122412-mlom51n4</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123747-yk6jzo2x</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -652,7 +661,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4' target=\"_blank\">avid-haze-5</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x' target=\"_blank\">misty-field-8</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -676,7 +685,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -700,7 +709,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">avid-haze-5</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4</a><br/>Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">misty-field-8</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x</a><br/>Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 2 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -712,7 +721,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_122412-mlom51n4/logs</code>"
+       "Find logs at: <code>./wandb/run-20230706_123747-yk6jzo2x/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"

From 964e9ea2aba50d1c0ee10c107831941094c6076c Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 13:11:09 +0000
Subject: [PATCH 11/43] 33M model, reasonable results

---
 dlai/04_train_llm.ipynb | 396 ++++++++++++++++++++++++----------------
 1 file changed, 239 insertions(+), 157 deletions(-)

diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb
index ae128ebc..3fbef4e5 100644
--- a/dlai/04_train_llm.ipynb
+++ b/dlai/04_train_llm.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 1,
    "id": "fe0ed738-872b-4952-baf8-b3f7c92214a0",
    "metadata": {},
    "outputs": [],
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 2,
    "id": "f64e095c-d28b-44aa-a122-4121c5c66a5b",
    "metadata": {},
    "outputs": [],
@@ -35,27 +35,93 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "a7535b8b-d220-44e8-a56c-97e250c36596",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Found cached dataset parquet (/home/tcapelle/.cache/huggingface/datasets/MohamedRashad___parquet/MohamedRashad--characters_backstories-6398ba4bb1a6e421/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "899f1c4acc1a40d19459e9323bc75960",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ds = load_dataset('MohamedRashad/characters_backstories')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "78879ff2-7eca-4b57-83f8-00b203f9e65d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DatasetDict({\n",
+       "    train: Dataset({\n",
+       "        features: ['text', 'target'],\n",
+       "        num_rows: 2322\n",
+       "    })\n",
+       "})"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6a58c980-50ce-4d57-8d58-13a4bbda7249",
+   "metadata": {},
+   "source": [
+    "As this dataset has no validation split, we will create one:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "7dae9106-8015-43da-a6d9-1124dee4bdde",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "ds = load_dataset('wikitext', 'wikitext-2-raw-v1')"
+    "ds = ds[\"train\"].train_test_split(test_size=0.2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 6,
    "id": "2286ae41-213d-480d-a4ba-8c4e2e1c4771",
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_checkpoint = \"roneneldan/TinyStories-1M\"  # distilgpt2"
+    "model_checkpoint = \"roneneldan/TinyStories-33M\"  # distilgpt2"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 7,
    "id": "26dfa0b7-8d9f-44f3-9e09-bc12bcb5ae0b",
    "metadata": {},
    "outputs": [],
@@ -73,13 +139,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 8,
    "id": "0ea05869-8ece-4a82-b9d4-3a62a84b6a77",
    "metadata": {},
    "outputs": [],
    "source": [
     "def tokenize_function(examples):\n",
-    "    return tokenizer(examples[\"text\"])"
+    "    return tokenizer(examples[\"target\"])"
    ]
   },
   {
@@ -92,24 +158,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
-   "id": "22368c91-ddf8-4b08-848e-f732ff155494",
+   "execution_count": 9,
+   "id": "2ce65c5f-8227-4c41-9e96-dfc80de611be",
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
       "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/4358 [00:00<?, ? examples/s]"
+       "{'text': 'Generate Backstory based on following information\\nCharacter Name: Vaskir Tempus\\nCharacter Race: Yuan-ti abomination\\nCharacter Class: Paladin of treachery/ goo bladelock\\n\\nOutput:\\n',\n",
+       " 'target': 'Vaskir is an exiled yuan ti who forsook his religion to worship the great old one dendar. he is a master swordsman who wields a greatsword in tandem with a longsword, effectively dualwielding the huge blade. He is chaotic evil, believing that government and law holds back all of humanity from their goals, keeping them oppressed and subjugated under the foot of the highest ruler'}"
       ]
      },
+     "execution_count": 9,
      "metadata": {},
-     "output_type": "display_data"
-    },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds[\"train\"][232]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "44c0f6c6-8b87-4dc7-b138-f3c9d6cac163",
+   "metadata": {},
+   "source": [
+    "we want to grab the characters backstories in the `target` column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "22368c91-ddf8-4b08-848e-f732ff155494",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
@@ -118,12 +202,22 @@
        "version_minor": 0
       },
       "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/36718 [00:00<?, ? examples/s]"
+       "Map (num_proc=4):   0%|          | 0/1857 [00:00<?, ? examples/s]"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (2264 > 2048). Running this sequence through the model will result in indexing errors\n",
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (2812 > 2048). Running this sequence through the model will result in indexing errors\n",
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (2573 > 2048). Running this sequence through the model will result in indexing errors\n",
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (2952 > 2048). Running this sequence through the model will result in indexing errors\n"
+     ]
+    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
@@ -132,60 +226,61 @@
        "version_minor": 0
       },
       "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/3760 [00:00<?, ? examples/s]"
+       "Map (num_proc=4):   0%|          | 0/465 [00:00<?, ? examples/s]"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (2661 > 2048). Running this sequence through the model will result in indexing errors\n",
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (4725 > 2048). Running this sequence through the model will result in indexing errors\n",
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (2464 > 2048). Running this sequence through the model will result in indexing errors\n",
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (3121 > 2048). Running this sequence through the model will result in indexing errors\n"
+     ]
     }
    ],
    "source": [
-    "tokenized_datasets = ds.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\"])"
+    "tokenized_datasets = ds.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\", \"target\"])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "f5880220-3b8b-414e-9e9a-6e6541784417",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 11,
    "id": "d59cc8a9-5f87-4eb7-abbc-f4fc18fea51d",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'input_ids': [796, 569, 18354, 7496, 17740, 6711, 796, 220, 198],\n",
-       " 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}"
+       "[13]"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "tokenized_datasets[\"train\"][1]"
+    "tokenized_datasets[\"train\"][1][\"input_ids\"][0:10]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 12,
    "id": "70bc12ae-52dc-47ad-b9ef-1e5b8af829e8",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "' = Valkyria Chronicles III = \\n'"
+       "'.'"
       ]
      },
-     "execution_count": 43,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -196,19 +291,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 13,
    "id": "85c6ad00-3825-4f12-be49-8ff336d5d398",
    "metadata": {},
    "outputs": [],
    "source": [
-    "block_size = 128\n",
+    "block_size = 256\n",
     "\n",
     "def group_texts(examples):\n",
-    "    # Concatenate all texts.\n",
     "    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n",
     "    total_length = len(concatenated_examples[list(examples.keys())[0]])\n",
-    "    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can\n",
-    "        # customize this part to your needs.\n",
     "    total_length = (total_length // block_size) * block_size\n",
     "    # Split by chunks of max_len.\n",
     "    result = {\n",
@@ -231,7 +323,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 14,
    "id": "572f29c8-84d3-45b9-b8df-26de8c22bc91",
    "metadata": {},
    "outputs": [
@@ -243,21 +335,7 @@
        "version_minor": 0
       },
       "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/4358 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/36718 [00:00<?, ? examples/s]"
+       "Map (num_proc=4):   0%|          | 0/1857 [00:00<?, ? examples/s]"
       ]
      },
      "metadata": {},
@@ -271,7 +349,7 @@
        "version_minor": 0
       },
       "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/3760 [00:00<?, ? examples/s]"
+       "Map (num_proc=4):   0%|          | 0/465 [00:00<?, ? examples/s]"
       ]
      },
      "metadata": {},
@@ -289,92 +367,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
-   "id": "a9e1a750-f115-46a8-b8fb-a4cb5d27d954",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "' game and follows the \" Nameless \", a penal military unit serving the nation of Gallia during the Second Europan War who perform secret black operations and are pitted against the Imperial unit \" Calamaty Raven \". \\n The game began development in 2010, carrying over a large portion of the work done on Valkyria Chronicles II. While it retained the standard features of the series, it also underwent multiple adjustments, such as making the game more forgiving for series newcomers. Character designer Raita Honjou and composer Hitoshi Sakimoto both returned from previous entries, along with Valkyria Chronicles II director Takeshi Oz'"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.decode(lm_datasets[\"train\"][1][\"input_ids\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 15,
    "id": "b4f131eb-979e-40f6-9e28-19756beaa8e4",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "183ccb9277844038a6aa86466d58ac1c",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading (…)lve/main/config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ba4f041c71ca467482a4137c78f882c7",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading pytorch_model.bin:   0%|          | 0.00/48.6M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "model = AutoModelForCausalLM.from_pretrained(model_checkpoint)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "id": "b99d656a-2ea8-4aca-8809-0d6c9a4aa15d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 16,
    "id": "7345ab23-8d12-4d4c-a39d-bb2202bff218",
    "metadata": {},
    "outputs": [],
    "source": [
-    "os.environ[\"WANDB_PROJECT\"] = \"tiny-stories\""
+    "os.environ[\"WANDB_PROJECT\"] = \"tiny-stories-characters\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 17,
    "id": "d74ee155-3c30-4ef2-9c4d-fd8ee222c50c",
    "metadata": {},
    "outputs": [],
    "source": [
     "model_name = model_checkpoint.split(\"/\")[-1]\n",
     "training_args = TrainingArguments(\n",
-    "    f\"{model_name}-finetuned-wikitext2\",\n",
+    "    f\"{model_name}-finetuned-characters-backstories\",\n",
     "    report_to=\"wandb\",\n",
     "    logging_steps=1,\n",
     "    evaluation_strategy = \"epoch\",\n",
@@ -385,7 +405,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 18,
    "id": "af62105f-a478-436f-88a2-5c1d78b9d20a",
    "metadata": {},
    "outputs": [],
@@ -394,13 +414,13 @@
     "    model=model,\n",
     "    args=training_args,\n",
     "    train_dataset=lm_datasets[\"train\"],\n",
-    "    eval_dataset=lm_datasets[\"validation\"],\n",
+    "    eval_dataset=lm_datasets[\"test\"],\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 19,
    "id": "01958a56-c22a-4a27-bc71-41c59fc97f05",
    "metadata": {},
    "outputs": [
@@ -409,7 +429,7 @@
      "output_type": "stream",
      "text": [
       "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33manony-moose-57595088200203951\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
      ]
     },
     {
@@ -427,19 +447,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_104512-3u6izcp0</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/anony-moose-57595088200203951/tiny-stories/runs/3u6izcp0?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">fallen-voice-1</a></strong> to <a href='https://wandb.ai/anony-moose-57595088200203951/tiny-stories?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_130647-l0pa7ivo</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -451,7 +459,7 @@
     {
      "data": {
       "text/html": [
-       " View project at <a href='https://wandb.ai/anony-moose-57595088200203951/tiny-stories?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/tiny-stories?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters/runs/l0pa7ivo' target=\"_blank\">dulcet-cherry-3</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -463,7 +471,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/anony-moose-57595088200203951/tiny-stories/runs/3u6izcp0?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2' target=\"_blank\">https://wandb.ai/anony-moose-57595088200203951/tiny-stories/runs/3u6izcp0?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2</a>"
+       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -475,7 +483,7 @@
     {
      "data": {
       "text/html": [
-       "Do NOT share these links with anyone. They can be used to claim your runs."
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters/runs/l0pa7ivo' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters/runs/l0pa7ivo</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -490,8 +498,8 @@
        "\n",
        "    <div>\n",
        "      \n",
-       "      <progress value='7002' max='7002' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [7002/7002 03:51, Epoch 3/3]\n",
+       "      <progress value='816' max='816' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [816/816 01:26, Epoch 3/3]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -504,18 +512,18 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <td>1</td>\n",
-       "      <td>6.471900</td>\n",
-       "      <td>6.374047</td>\n",
+       "      <td>5.795800</td>\n",
+       "      <td>5.142995</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td>2</td>\n",
-       "      <td>5.812100</td>\n",
-       "      <td>6.196322</td>\n",
+       "      <td>4.887100</td>\n",
+       "      <td>5.009582</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td>3</td>\n",
-       "      <td>5.659900</td>\n",
-       "      <td>6.153089</td>\n",
+       "      <td>4.886900</td>\n",
+       "      <td>4.998519</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table><p>"
@@ -530,10 +538,10 @@
     {
      "data": {
       "text/plain": [
-       "TrainOutput(global_step=7002, training_loss=6.346599991952852, metrics={'train_runtime': 237.6024, 'train_samples_per_second': 235.679, 'train_steps_per_second': 29.469, 'total_flos': 17136527671296.0, 'train_loss': 6.346599991952852, 'epoch': 3.0})"
+       "TrainOutput(global_step=816, training_loss=5.0707503650702686, metrics={'train_runtime': 92.5014, 'train_samples_per_second': 70.572, 'train_steps_per_second': 8.821, 'total_flos': 284203589566464.0, 'train_loss': 5.0707503650702686, 'epoch': 3.0})"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -542,13 +550,87 @@
     "trainer.train()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "0a247e01-35d0-454f-8b7e-5f24cdf66f33",
+   "metadata": {},
+   "source": [
+    "## Generate"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "36f0eef0-8729-4d32-a2aa-b1577847f7f5",
+   "execution_count": 21,
+   "id": "6f16d43d-445f-4df5-8734-85584f95792f",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "model = trainer.model\n",
+    "device = next(model.parameters()).device"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "7911e43f-f4ce-4855-9f68-662438af8d24",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt = \"The hero was half human and cat, his strenghts were\"\n",
+    "\n",
+    "input_ids = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "844802b9-0ffc-466e-bedb-d7b7c6f337de",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[  464,  4293,   373,  2063,  1692,   290,  3797,    11,   465, 43071,\n",
+       "           456,   912,   547]], device='cuda:0')"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_ids"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "e0883650-ab62-49c9-88d8-7f8c4fdfb0a9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The hero was half human and cat, his strenghts were the first to be. He was the only one who had a lot of power, and he was the only one who had a lot of power. He was a great wizard, and he was the only one who could do it. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great\n"
+     ]
+    }
+   ],
+   "source": [
+    "output = model.generate(input_ids, max_length = 128, num_beams=1)\n",
+    "output_text = tokenizer.decode(output[0], skip_special_tokens=True)\n",
+    "\n",
+    "print(output_text)"
+   ]
   }
  ],
  "metadata": {

From 1ee868344fad8344c20404bb646def32d51d43bb Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 13:12:15 +0000
Subject: [PATCH 12/43] update readme

---
 dlai/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dlai/README.md b/dlai/README.md
index a20f4a8d..13db46a9 100644
--- a/dlai/README.md
+++ b/dlai/README.md
@@ -8,6 +8,6 @@ We instrument various notebooks from the generative AI course with W&B to track
 - [01_diffusion_training](01_diffusion_training.ipynb) In this notebook we train a diffusion model to generate images from the Sprites dataset. We log the training metrics to W&B. We sample from the model and log the images to W&B.
 - [02_diffusion_sampling](02_diffusion_sampling.ipynb) In this notebook we sample from the trained model and log the images to W&B. We compare different sampling methods and log the results.
 - [03 LLM evaluation and debugging](03_llm_eval.ipynb) In this notebook we generate character names using LLMs and use W&B autologgers and Tracer to evaluate and debug our generations.
-- [04 WIP]() We are planning to add a CPU-based LLM finetuning notebook with a small LLM finetuned for generating names
+- [04 WIP](04_train_llm.ipynb) Finetunning and LLM on a character based dataset to create heros descriptions!
 
-The W&B dashboard: https://wandb.ai/capecape/dlai_diffusion
\ No newline at end of file
+The W&B dashboard: https://wandb.ai/deeplearning-ai-temp
\ No newline at end of file

From 0eab3807e76c18c9242dcdff06e1e500f82ea407 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 13:57:52 +0000
Subject: [PATCH 13/43] trained model

---
 dlai/data/weights/context_model.pth | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 dlai/data/weights/context_model.pth

diff --git a/dlai/data/weights/context_model.pth b/dlai/data/weights/context_model.pth
new file mode 100644
index 00000000..451319cf
--- /dev/null
+++ b/dlai/data/weights/context_model.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51535239b6f3e953db8ff9631278c3e6b133a5a500780bda5092db620ca8f570
+size 5989463

From 0c2cf01eed73ee93ae162a164ca7bbf98cba29f5 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 13:58:13 +0000
Subject: [PATCH 14/43] HF stack

---
 dlai/requirements.txt | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/dlai/requirements.txt b/dlai/requirements.txt
index 886e2b28..f8014cce 100644
--- a/dlai/requirements.txt
+++ b/dlai/requirements.txt
@@ -4,4 +4,10 @@ matplotlib
 pandas
 numpy
 wandb
-tqdm
\ No newline at end of file
+tqdm
+openai
+tenacity
+rich
+transformers
+datasets
+accelerate
\ No newline at end of file

From 8666f90b9ddd45e6e53d8ab44dba6b1b50a7dfc1 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 13:58:39 +0000
Subject: [PATCH 15/43] latest on reg

---
 dlai/02_diffusion_sampling.ipynb | 70 ++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 31 deletions(-)

diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index 96e13a72..26a12fc3 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "id": "54c3a942",
    "metadata": {
     "tags": []
@@ -60,7 +60,7 @@
    "source": [
     "# Wandb Params\n",
     "PROJECT = \"sprite_diffusion\"\n",
-    "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:v0\" \n",
+    "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:latest\" \n",
     "\n",
     "# ddpm sampler hyperparameters\n",
     "timesteps = 500\n",
@@ -95,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "8ab66255",
    "metadata": {},
    "outputs": [],
@@ -129,7 +129,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "b47633e2",
    "metadata": {},
    "outputs": [
@@ -165,7 +165,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "f6f479d1",
    "metadata": {},
    "outputs": [],
@@ -179,7 +179,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "8b0f5bed",
    "metadata": {},
    "outputs": [],
@@ -205,7 +205,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "id": "16085a65",
    "metadata": {},
    "outputs": [],
@@ -242,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "id": "d88afdba",
    "metadata": {
     "tags": []
@@ -273,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc",
    "metadata": {},
    "outputs": [],
@@ -292,7 +292,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "id": "5fdfc048-47f0-43b5-983e-da715e1ed562",
    "metadata": {
     "tags": []
@@ -330,7 +330,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2",
    "metadata": {},
    "outputs": [],
@@ -349,7 +349,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "id": "89e24210-4885-4559-92e1-db10566ef5ea",
    "metadata": {},
    "outputs": [
@@ -376,7 +376,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
    "id": "25b07c26-0ac2-428a-8351-34f8b7228074",
    "metadata": {},
    "outputs": [
@@ -394,7 +394,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 16,
    "id": "af33d3c4",
    "metadata": {},
    "outputs": [],
@@ -424,7 +424,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3",
    "metadata": {},
    "outputs": [],
@@ -442,12 +442,12 @@
    "id": "987cee86-2db1-4a2a-9d14-f70c6248ecb9",
    "metadata": {},
    "source": [
-    "we log the table to W&B, we can also use `wandb.init` as a context manager, this way we ensure that the run is finished when exiting the manager"
+    "we log the table to W&B, we can also use `wandb.init` as a context manager, this way we ensure that the run is finished when exiting the manager."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 18,
    "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27",
    "metadata": {},
    "outputs": [
@@ -474,7 +474,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123733-eztcx1w7</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_135129-c1jaiuwv</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -486,7 +486,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7' target=\"_blank\">fresh-frost-7</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv' target=\"_blank\">rare-thunder-9</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -510,7 +510,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -534,7 +534,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">fresh-frost-7</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7</a><br/>Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 2 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">rare-thunder-9</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv</a><br/>Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 1 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -546,7 +546,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_123733-eztcx1w7/logs</code>"
+       "Find logs at: <code>./wandb/run-20230706_135129-c1jaiuwv/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -572,7 +572,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 19,
    "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a",
    "metadata": {},
    "outputs": [
@@ -602,7 +602,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 20,
    "id": "626ef616-dae4-4417-9219-d67ef0794e63",
    "metadata": {},
    "outputs": [],
@@ -617,7 +617,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 21,
    "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d",
    "metadata": {},
    "outputs": [],
@@ -628,7 +628,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 22,
    "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c",
    "metadata": {
     "scrolled": true
@@ -649,7 +649,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123747-yk6jzo2x</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_135310-u90wajwk</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -661,7 +661,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x' target=\"_blank\">misty-field-8</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk' target=\"_blank\">elated-eon-10</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -685,7 +685,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -709,7 +709,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">misty-field-8</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x</a><br/>Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 2 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">elated-eon-10</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk</a><br/>Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -721,7 +721,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_123747-yk6jzo2x/logs</code>"
+       "Find logs at: <code>./wandb/run-20230706_135310-u90wajwk/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -735,6 +735,14 @@
     "with wandb.init(project=PROJECT, job_type=\"sampling_mix\", config=config):\n",
     "    wandb.log({\"hero_table\":hero_table})"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a21faa2f-f43a-40c3-9041-7d07d73a358e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From 5071595a2cf94e37a16e1f1552478764962cac3f Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Thu, 6 Jul 2023 13:58:57 +0000
Subject: [PATCH 16/43] pathlib

---
 dlai/01_diffusion_training.ipynb | 537 ++-----------------------------
 1 file changed, 25 insertions(+), 512 deletions(-)

diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index 9d3845e8..c7d5bf20 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -23,6 +23,7 @@
    },
    "outputs": [],
    "source": [
+    "from pathlib import Path\n",
     "from tqdm.notebook import tqdm\n",
     "import torch\n",
     "import torch.nn.functional as F\n",
@@ -55,7 +56,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "54c3a942",
+   "id": "3d76c167-7122-4f88-9c9f-5ded96684fa5",
    "metadata": {
     "tags": []
    },
@@ -74,10 +75,10 @@
     "n_feat = 64 # 64 hidden dimension feature\n",
     "n_cfeat = 5 # context vector is of size 5\n",
     "height = 16 # 16x16 image\n",
-    "data_dir = './data/'\n",
-    "save_dir = './data/weights/'\n",
-    "if not os.path.exists(save_dir):\n",
-    "    os.mkdir(save_dir)\n",
+    "data_dir = Path('./data/')\n",
+    "save_dir = Path('./data/weights/')\n",
+    "save_dir.mkdir(exist_ok=True, parents=True)\n",
+    "\n",
     "\n",
     "# training hyperparameters\n",
     "batch_size = 100\n",
@@ -156,7 +157,7 @@
    ],
    "source": [
     "# load dataset and construct optimizer\n",
-    "dataset = CustomDataset.from_np(data_dir + \"sprites_1788_16x16.npy\", data_dir + \"sprite_labels_nc_1788_16x16.npy\")\n",
+    "dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", data_dir/\"sprite_labels_nc_1788_16x16.npy\")\n",
     "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)\n",
     "optim = torch.optim.Adam(nn_model.parameters(), lr=lrate, eps=1e-5)"
    ]
@@ -278,7 +279,7 @@
    "source": [
     "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n",
     "\n",
-    "### You can visit the result of this [training here](https://wandb.ai)"
+    "### You can visit the result of this [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua)"
    ]
   },
   {
@@ -310,7 +311,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_120421-lqf74fua</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123702-2bkmjqyt</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -322,7 +323,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua' target=\"_blank\">daily-frost-1</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/2bkmjqyt' target=\"_blank\">lemon-galaxy-6</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -346,7 +347,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/2bkmjqyt' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/2bkmjqyt</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -358,7 +359,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6aef0bb639f74bdf97d56b0c0cd1ffc5",
+       "model_id": "7369da274a8a448e8b4d47071261a2f1",
        "version_major": 2,
        "version_minor": 0
       },
@@ -372,161 +373,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
+       "model_id": "04dab6db5d12408dbe95aa9cf3475833",
        "version_major": 2,
        "version_minor": 0
       },
@@ -538,351 +385,17 @@
      "output_type": "display_data"
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c2bd6d2f891d4c69a4c285591b2bc1e4",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='51.649 MB of 51.655 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=0.9998…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<style>\n",
-       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
-       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
-       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
-       "    </style>\n",
-       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████</td></tr><tr><td>loss</td><td>█▆▅▅▄▃▃▃▃▃▄▃▃▃▃▃▃▂▂▂▃▂▂▂▂▂▂▂▂▃▃▂▂▁▁▁▂▂▁▁</td></tr><tr><td>lr</td><td>████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>epoch</td><td>31</td></tr><tr><td>loss</td><td>0.09235</td></tr><tr><td>lr</td><td>3e-05</td></tr></table><br/></div></div>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run <strong style=\"color:#cdcd00\">daily-frost-1</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua</a><br/>Synced 6 W&B file(s), 288 media file(s), 9 artifact file(s) and 1 other file(s)"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_120421-lqf74fua/logs</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[11], line 22\u001b[0m\n\u001b[1;32m     20\u001b[0m pred_noise \u001b[38;5;241m=\u001b[39m nn_model(x_pert, t \u001b[38;5;241m/\u001b[39m timesteps, c\u001b[38;5;241m=\u001b[39mc)      \n\u001b[1;32m     21\u001b[0m loss \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mmse_loss(pred_noise, noise)\n\u001b[0;32m---> 22\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m    \n\u001b[1;32m     23\u001b[0m optim\u001b[38;5;241m.\u001b[39mstep()\n\u001b[1;32m     25\u001b[0m \u001b[38;5;66;03m# we log the relevant metrics to the workspace\u001b[39;00m\n",
+      "File \u001b[0;32m~/mambaforge/envs/pt2/lib/python3.11/site-packages/torch/_tensor.py:487\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    477\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m    478\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m    479\u001b[0m         Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m    480\u001b[0m         (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    485\u001b[0m         inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m    486\u001b[0m     )\n\u001b[0;32m--> 487\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    488\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m    489\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/mambaforge/envs/pt2/lib/python3.11/site-packages/torch/autograd/__init__.py:200\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    195\u001b[0m     retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m    197\u001b[0m \u001b[38;5;66;03m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m    198\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m    199\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 200\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m    201\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    202\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
     }
    ],
    "source": [
@@ -918,7 +431,7 @@
     "    # save model periodically\n",
     "    if ep%4==0 or ep == int(n_epoch-1):\n",
     "        nn_model.eval()\n",
-    "        ckpt_file = save_dir + f\"context_model.pth\"\n",
+    "        ckpt_file = save_dir/f\"context_model.pth\"\n",
     "        torch.save(nn_model.state_dict(), ckpt_file)\n",
     "\n",
     "        # save model to wandb as an Artifact\n",

From 1051a159ab398f4f4cb60776b8dd249308c57811 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Fri, 7 Jul 2023 08:32:12 +0000
Subject: [PATCH 17/43] checlpoitns

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index f1658105..76e5303f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,4 @@ __pycache__/
 *$py.class
 
 .idea/
+/**/Tiny*

From bfb5d630395731eb52f2cc8e36cfd0d340861eed Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Fri, 7 Jul 2023 09:23:54 +0000
Subject: [PATCH 18/43] split lines, move dataloader out

---
 dlai/00_intro.ipynb | 205 +++++++++++++++++++++++++++++---------------
 dlai/utilities.py   |  19 +++-
 2 files changed, 154 insertions(+), 70 deletions(-)

diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb
index d858d6ce..0244b6fb 100644
--- a/dlai/00_intro.ipynb
+++ b/dlai/00_intro.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 8,
    "id": "e9ba792c-2baa-4c19-a132-2ed82a759e79",
    "metadata": {},
    "outputs": [],
@@ -17,14 +17,13 @@
     "import torch.nn as nn\n",
     "import torch.nn.functional as F\n",
     "from torch.optim import Adam\n",
-    "from torch.utils.data import DataLoader, Subset\n",
     "\n",
-    "from utilities import *\n"
+    "from utilities import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 9,
    "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501",
    "metadata": {},
    "outputs": [],
@@ -32,9 +31,17 @@
     "# wandb.login() # uncomment if you want to login to wandb"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "8e4aad93-5819-4304-afb5-d962ee3f5fed",
+   "metadata": {},
+   "source": [
+    "We will be running the notebook on `anonymous` mode"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 10,
    "id": "d51a9f7f",
    "metadata": {},
    "outputs": [],
@@ -44,27 +51,11 @@
     "HIDDEN_SIZE = 256\n",
     "NUM_WORKERS = 2\n",
     "CLASSES = [\"hero\", \"non-hero\", \"food\", \"spell\", \"side-facing\"]\n",
-    "\n",
-    "# Device\n",
-    "device = torch.device(\"cuda\" if torch.cuda.is_available()  else \"cpu\")\n",
-    "\n",
-    "data_dir = Path('./data/')\n",
-    "\n",
-    "def get_dataloaders(batch_size, slice_size=None, valid_pct=0.2):\n",
-    "    dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", \n",
-    "                                    data_dir/\"sprite_labels_nc_1788_16x16.npy\", argmax=True)\n",
-    "\n",
-    "    if slice_size:\n",
-    "        dataset = dataset.subset(slice_size)\n",
-    "\n",
-    "    train_ds, valid_ds = dataset.split(valid_pct)\n",
-    "\n",
-    "    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=1)    \n",
-    "    valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=1)\n",
-    "\n",
-    "    return train_dl, valid_dl\n",
+    "DATA_DIR = Path('./data/')\n",
+    "DEVICE = torch.device(\"cuda\" if torch.cuda.is_available()  else \"cpu\")\n",
     "\n",
     "def get_model(dropout):\n",
+    "    \"Simple MLP with Dropout\"\n",
     "    return nn.Sequential(\n",
     "        nn.Flatten(),\n",
     "        nn.Linear(INPUT_SIZE, HIDDEN_SIZE),\n",
@@ -72,12 +63,12 @@
     "        nn.ReLU(),\n",
     "        nn.Dropout(dropout),\n",
     "        nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE)\n",
-    "    ).to(device)\n"
+    "    ).to(DEVICE)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 11,
    "id": "8401cf96",
    "metadata": {},
    "outputs": [],
@@ -90,7 +81,7 @@
     "\n",
     "    with torch.inference_mode():\n",
     "        for i, (images, labels) in enumerate(valid_dl):\n",
-    "            images, labels = images.to(device), labels.to(device)\n",
+    "            images, labels = images.to(DEVICE), labels.to(DEVICE)\n",
     "\n",
     "            # Forward pass\n",
     "            outputs = model(images)\n",
@@ -109,17 +100,18 @@
     "\n",
     "def log_image_predictions_table(images, predicted, labels, probs):\n",
     "    \"Create a wandb Table to log images, labels, and predictions\"\n",
-    "    table = wandb.Table(columns=[\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)])\n",
+    "    columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n",
+    "    table = wandb.Table(columns=columns)\n",
     "    \n",
     "    for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n",
     "        table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n",
     "    \n",
-    "    wandb.log({\"predictions_table\": table}, commit=False)\n"
+    "    wandb.log({\"predictions_table\": table}, commit=False)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 12,
    "id": "e5492ebb-2dfa-44ce-af6c-24655e45a2ed",
    "metadata": {},
    "outputs": [],
@@ -133,7 +125,10 @@
     "    )\n",
     "\n",
     "    # Get the data\n",
-    "    train_dl, valid_dl = get_dataloaders(config.batch_size, config.slice_size, config.valid_pct)\n",
+    "    train_dl, valid_dl = get_dataloaders(DATA_DIR, \n",
+    "                                         config.batch_size, \n",
+    "                                         config.slice_size, \n",
+    "                                         config.valid_pct)\n",
     "    n_steps_per_epoch = math.ceil(len(train_dl.dataset) / config.batch_size)\n",
     "\n",
     "    # A simple MLP model\n",
@@ -149,7 +144,7 @@
     "        model.train()\n",
     "\n",
     "        for step, (images, labels) in enumerate(train_dl):\n",
-    "            images, labels = images.to(device), labels.to(device)\n",
+    "            images, labels = images.to(DEVICE), labels.to(DEVICE)\n",
     "\n",
     "            outputs = model(images)\n",
     "            train_loss = loss_func(outputs, labels)\n",
@@ -160,15 +155,14 @@
     "            example_ct += len(images)\n",
     "            metrics = {\n",
     "                \"train/train_loss\": train_loss,\n",
-    "                \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch)) / n_steps_per_epoch,\n",
+    "                \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch))/n_steps_per_epoch,\n",
     "                \"train/example_ct\": example_ct\n",
     "            }\n",
+    "            wandb.log(metrics)\n",
     "\n",
-    "            if step + 1 < n_steps_per_epoch:\n",
-    "                # Log train metrics to wandb \n",
-    "                wandb.log(metrics)\n",
-    "                \n",
-    "        val_loss, accuracy = validate_model(model, valid_dl, loss_func, log_images=(epoch == (config.epochs - 1)))\n",
+    "        # compute validation metrics, log images on last epoch\n",
+    "        val_loss, accuracy = validate_model(model, valid_dl, loss_func, \n",
+    "                                            log_images=(epoch == (config.epochs - 1)))\n",
     "\n",
     "        # Log train and validation metrics to wandb\n",
     "        val_metrics = {\n",
@@ -177,15 +171,12 @@
     "        }\n",
     "        wandb.log({**metrics, **val_metrics})\n",
     "\n",
-    "    # If you had a test set, this is how you could log it as a Summary metric\n",
-    "    wandb.run.summary['test_accuracy'] = 0.8\n",
-    "\n",
-    "    wandb.finish()\n"
+    "    wandb.finish()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 13,
    "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee",
    "metadata": {},
    "outputs": [],
@@ -202,17 +193,97 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 14,
    "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6",
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
-     ]
+     "data": {
+      "text/html": [
+       "Finishing last run (ID:lskq5lst) before initializing another..."
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c174ee7503f14722bee9a4d0db66a179",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='0.062 MB of 0.062 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">robust-salad-10</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/lskq5lst' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/lskq5lst</a><br/>Synced 7 W&B file(s), 0 media file(s), 0 artifact file(s) and 2 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20230707_091843-lskq5lst/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Successfully finished last run (ID:lskq5lst). Initializing new run:<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d2250f5729b647df90ebc0b6830c5d55",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670081783331625, max=1.0…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
     },
     {
      "data": {
@@ -229,7 +300,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123525-7s54fntl</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230707_091936-7ibfofq5</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -241,7 +312,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl' target=\"_blank\">clear-sound-7</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5' target=\"_blank\">ruby-voice-11</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -265,7 +336,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -287,7 +358,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e77b2cb6e7094dec858054a0ef68c5d3",
+       "model_id": "02a9448faca44d53b9ae82b2db4ba30d",
        "version_major": 2,
        "version_minor": 0
       },
@@ -318,7 +389,7 @@
        "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
        "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
        "    </style>\n",
-       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███</td></tr><tr><td>train/example_ct</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███</td></tr><tr><td>train/train_loss</td><td>█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁▅█</td></tr><tr><td>val/val_loss</td><td>█▃▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>24000</td></tr><tr><td>train/train_loss</td><td>0.0144</td></tr><tr><td>val/val_accuracy</td><td>1.0</td></tr><tr><td>val/val_loss</td><td>0.00669</td></tr></table><br/></div></div>"
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>train/example_ct</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>train/train_loss</td><td>█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁██</td></tr><tr><td>val/val_loss</td><td>█▂▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>24000</td></tr><tr><td>train/train_loss</td><td>0.00699</td></tr><tr><td>val/val_accuracy</td><td>1.0</td></tr><tr><td>val/val_loss</td><td>0.00692</td></tr></table><br/></div></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -330,7 +401,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">clear-sound-7</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl</a><br/>Synced 7 W&B file(s), 1 media file(s), 127 artifact file(s) and 2 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">ruby-voice-11</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5</a><br/>Synced 7 W&B file(s), 1 media file(s), 122 artifact file(s) and 1 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -342,7 +413,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_123525-7s54fntl/logs</code>"
+       "Find logs at: <code>./wandb/run-20230707_091936-7ibfofq5/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -367,19 +438,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 15,
    "id": "4f40520a-66f8-4415-9e36-174dda06aca0",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8acd0e8533e84ae88b608e748a904422",
+       "model_id": "264ff7030aa5490daaab7d7f573f268d",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670229483376413, max=1.0…"
+       "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670244300000074, max=1.0…"
       ]
      },
      "metadata": {},
@@ -400,7 +471,7 @@
     {
      "data": {
       "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123542-lbawks79</code>"
+       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230707_092004-5pt6qfs9</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -412,7 +483,7 @@
     {
      "data": {
       "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79' target=\"_blank\">fearless-bird-8</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9' target=\"_blank\">zesty-oath-12</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -436,7 +507,7 @@
     {
      "data": {
       "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79</a>"
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9</a>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -458,7 +529,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e8b441fe3ba44768b4b2215b68a437b6",
+       "model_id": "56d2c401c89f4eb797175dff9ddbf7d0",
        "version_major": 2,
        "version_minor": 0
       },
@@ -489,7 +560,7 @@
        "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
        "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
        "    </style>\n",
-       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███</td></tr><tr><td>train/example_ct</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███</td></tr><tr><td>train/train_loss</td><td>█▅▃▃▃▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁██</td></tr><tr><td>val/val_loss</td><td>█▂▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>24000</td></tr><tr><td>train/train_loss</td><td>0.02836</td></tr><tr><td>val/val_accuracy</td><td>1.0</td></tr><tr><td>val/val_loss</td><td>0.00944</td></tr></table><br/></div></div>"
+       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>train/example_ct</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>train/train_loss</td><td>█▅▃▂▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁▆█</td></tr><tr><td>val/val_loss</td><td>█▃▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>24000</td></tr><tr><td>train/train_loss</td><td>0.01679</td></tr><tr><td>val/val_accuracy</td><td>1.0</td></tr><tr><td>val/val_loss</td><td>0.00977</td></tr></table><br/></div></div>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -501,7 +572,7 @@
     {
      "data": {
       "text/html": [
-       " View run <strong style=\"color:#cdcd00\">fearless-bird-8</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79</a><br/>Synced 7 W&B file(s), 1 media file(s), 124 artifact file(s) and 2 other file(s)"
+       " View run <strong style=\"color:#cdcd00\">zesty-oath-12</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9</a><br/>Synced 7 W&B file(s), 1 media file(s), 124 artifact file(s) and 2 other file(s)"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -513,7 +584,7 @@
     {
      "data": {
       "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_123542-lbawks79/logs</code>"
+       "Find logs at: <code>./wandb/run-20230707_092004-5pt6qfs9/logs</code>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
diff --git a/dlai/utilities.py b/dlai/utilities.py
index 29ce5bc9..561ee270 100644
--- a/dlai/utilities.py
+++ b/dlai/utilities.py
@@ -1,5 +1,6 @@
 import os
 import random
+from pathlib import Path
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -8,7 +9,7 @@
 import torchvision.transforms as transforms
 from matplotlib.animation import FuncAnimation, PillowWriter
 from PIL import Image
-from torch.utils.data import Dataset
+from torch.utils.data import Dataset, DataLoader
 from torchvision.utils import make_grid, save_image
 
 
@@ -334,8 +335,6 @@ def __init__(self, sprites, slabels, transform=default_tfms, null_context=False,
             self.slabels = np.argmax(slabels, axis=1)
         else:
             self.slabels = slabels
-        print(f"sprite shape: {self.sprites.shape}")
-        print(f"labels shape: {self.slabels.shape}")
         self.transform = transform
         self.null_context = null_context
 
@@ -373,4 +372,18 @@ def split(self, pct=0.2):
         train_dataset, test_dataset = torch.utils.data.random_split(self, [train_size, test_size])
         return train_dataset, test_dataset
 
+def get_dataloaders(data_dir, batch_size, slice_size=None, valid_pct=0.2):
+    "Get train/val dataloaders for classification on sprites dataset"
+    dataset = CustomDataset.from_np(Path(data_dir)/"sprites_1788_16x16.npy", 
+                                    Path(data_dir)/"sprite_labels_nc_1788_16x16.npy",
+                                    argmax=True)
 
+    if slice_size:
+        dataset = dataset.subset(slice_size)
+
+    train_ds, valid_ds = dataset.split(valid_pct)
+
+    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=1)    
+    valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=1)
+
+    return train_dl, valid_dl

From bc665272da43e5e92c40034bc2f325657521e922 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Fri, 7 Jul 2023 09:37:17 +0000
Subject: [PATCH 19/43] SimpleNamespace isort

---
 dlai/01_diffusion_training.ipynb | 20 +++++++++++++++-----
 dlai/02_diffusion_sampling.ipynb |  4 +++-
 dlai/utilities.py                |  2 +-
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index c7d5bf20..6b038bb3 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -23,6 +23,7 @@
    },
    "outputs": [],
    "source": [
+    "from types import SimpleNamespace\n",
     "from pathlib import Path\n",
     "from tqdm.notebook import tqdm\n",
     "import torch\n",
@@ -63,7 +64,7 @@
    "outputs": [],
    "source": [
     "# hyperparameters\n",
-    "num_samples = 32\n",
+    "num_samples = 30\n",
     "\n",
     "# diffusion hyperparameters\n",
     "timesteps = 500\n",
@@ -86,7 +87,7 @@
     "lrate=1e-3\n",
     "\n",
     "# we are storing the parameters in a dictionary to be logged to wandb\n",
-    "config = dict(\n",
+    "config = SimpleNamespace(\n",
     "    num_samples=num_samples,\n",
     "    timesteps=timesteps,\n",
     "    beta1=beta1,\n",
@@ -269,17 +270,23 @@
     "noises = torch.randn(num_samples, 3, height, height).to(device)  \n",
     "\n",
     "# A fixed context vector to sample from\n",
-    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()"
+    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
+    "                                     1,1,1,1,1,1,\n",
+    "                                     2,2,2,2,2,2,\n",
+    "                                     3,3,3,3,3,3,\n",
+    "                                     4,4,4,4,4,4]), \n",
+    "                       5).to(device=device).float()"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "26765a7e-4ddc-449e-95c3-54c58a564738",
    "metadata": {},
    "source": [
     "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n",
     "\n",
-    "### You can visit the result of this [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua)"
+    "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua) <<"
    ]
   },
   {
@@ -401,7 +408,10 @@
    "source": [
     "# same code as before, added comments on the extra W&B instrumentation lines\n",
     "# create a wandb run\n",
-    "run = wandb.init(project=\"sprite_diffusion\", job_type=\"train\", anonymous=\"allow\", config=config)\n",
+    "run = wandb.init(project=\"sprite_diffusion\", \n",
+    "                 job_type=\"train\", \n",
+    "                 anonymous=\"allow\", \n",
+    "                 config=config)\n",
     "\n",
     "for ep in tqdm(range(n_epoch), leave=True, total=n_epoch):\n",
     "    # set into train mode\n",
diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index 26a12fc3..45ed8425 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -21,6 +21,7 @@
    },
    "outputs": [],
    "source": [
+    "from types import SimpleNamespace\n",
     "from pathlib import Path\n",
     "import torch\n",
     "import torch.nn.functional as F\n",
@@ -73,7 +74,7 @@
     "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n",
     "\n",
     "# we are storing the parameters in a dictionary to be logged to wandb\n",
-    "config = dict(\n",
+    "config = SimpleNamespace(\n",
     "    timesteps=timesteps,\n",
     "    beta1=beta1,\n",
     "    beta2=beta2,\n",
@@ -194,6 +195,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "895b2bbc-46cd-4be8-a672-e84ef69bc329",
    "metadata": {},
diff --git a/dlai/utilities.py b/dlai/utilities.py
index 561ee270..a57955f4 100644
--- a/dlai/utilities.py
+++ b/dlai/utilities.py
@@ -9,7 +9,7 @@
 import torchvision.transforms as transforms
 from matplotlib.animation import FuncAnimation, PillowWriter
 from PIL import Image
-from torch.utils.data import Dataset, DataLoader
+from torch.utils.data import DataLoader, Dataset
 from torchvision.utils import make_grid, save_image
 
 

From aa65e53df91388d274d422d05800a2a88518e87e Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Fri, 7 Jul 2023 09:37:31 +0000
Subject: [PATCH 20/43] delete old checkpoint

---
 dlai/data/weights/context_model_trained.pth | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 dlai/data/weights/context_model_trained.pth

diff --git a/dlai/data/weights/context_model_trained.pth b/dlai/data/weights/context_model_trained.pth
deleted file mode 100644
index 451319cf..00000000
--- a/dlai/data/weights/context_model_trained.pth
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:51535239b6f3e953db8ff9631278c3e6b133a5a500780bda5092db620ca8f570
-size 5989463

From d202631d5ecd4237a3e8d145134e3260228174e2 Mon Sep 17 00:00:00 2001
From: kldarek <darek.kleczek@gmail.com>
Date: Tue, 11 Jul 2023 12:38:18 +0200
Subject: [PATCH 21/43] intro nb

---
 dlai/00_intro.ipynb | 554 +++++++++-----------------------------------
 1 file changed, 115 insertions(+), 439 deletions(-)

diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb
index 0244b6fb..ac282235 100644
--- a/dlai/00_intro.ipynb
+++ b/dlai/00_intro.ipynb
@@ -1,8 +1,19 @@
 {
  "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "dbfb9335",
+   "metadata": {},
+   "source": [
+    "# Introduction to W&B\n",
+    "\n",
+    "We will add `wandb` to sprite classification model training, so that we can track and visualize important metrics, gain insights into our model's behavior and make informed decisions for model improvements. We will also see how to compare and analyze different experiments, collaborate with team members, and reproduce results effectively."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "e9ba792c-2baa-4c19-a132-2ed82a759e79",
    "metadata": {},
    "outputs": [],
@@ -21,9 +32,19 @@
     "from utilities import *"
    ]
   },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "c4cac7d2",
+   "metadata": {},
+   "source": [
+    "### W&B account\n",
+    "[Sign up](https://wandb.ai/site) for a free account at https://wandb.ai/site and then login to your wandb account to store the results of your experiments and use advanced W&B features. You can also continue to learn in anonymous mode. "
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501",
    "metadata": {},
    "outputs": [],
@@ -32,16 +53,19 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
-   "id": "8e4aad93-5819-4304-afb5-d962ee3f5fed",
+   "id": "2e0bfcc9",
    "metadata": {},
    "source": [
-    "We will be running the notebook on `anonymous` mode"
+    "### Sprite classification\n",
+    "\n",
+    "We will build a simple model to classify sprites. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "d51a9f7f",
    "metadata": {},
    "outputs": [],
@@ -68,50 +92,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "8401cf96",
+   "execution_count": null,
+   "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):\n",
-    "    \"Compute the performance of the model on the validation dataset and log a wandb.Table\"\n",
-    "    model.eval()\n",
-    "    val_loss = 0.0\n",
-    "    correct = 0\n",
-    "\n",
-    "    with torch.inference_mode():\n",
-    "        for i, (images, labels) in enumerate(valid_dl):\n",
-    "            images, labels = images.to(DEVICE), labels.to(DEVICE)\n",
-    "\n",
-    "            # Forward pass\n",
-    "            outputs = model(images)\n",
-    "            val_loss += loss_func(outputs, labels) * labels.size(0)\n",
-    "\n",
-    "            # Compute accuracy and accumulate\n",
-    "            _, predicted = torch.max(outputs.data, 1)\n",
-    "            correct += (predicted == labels).sum().item()\n",
-    "\n",
-    "            # Log one batch of images to the dashboard, always same batch_idx.\n",
-    "            if i == batch_idx and log_images:\n",
-    "                log_image_predictions_table(images, predicted, labels, outputs.softmax(dim=1))\n",
-    "\n",
-    "    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n",
-    "\n",
-    "\n",
-    "def log_image_predictions_table(images, predicted, labels, probs):\n",
-    "    \"Create a wandb Table to log images, labels, and predictions\"\n",
-    "    columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n",
-    "    table = wandb.Table(columns=columns)\n",
-    "    \n",
-    "    for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n",
-    "        table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n",
-    "    \n",
-    "    wandb.log({\"predictions_table\": table}, commit=False)"
+    "# Let's define a config object to store our hyperparameters\n",
+    "config = SimpleNamespace(\n",
+    "    epochs = 2,\n",
+    "    batch_size = 128,\n",
+    "    lr = 1e-5,\n",
+    "    dropout = 0.5,\n",
+    "    slice_size = 10_000,\n",
+    "    valid_pct = 0.2,\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "e5492ebb-2dfa-44ce-af6c-24655e45a2ed",
    "metadata": {},
    "outputs": [],
@@ -119,7 +118,7 @@
     "def train_model(config):\n",
     "    \"Train a model with a given config\"\n",
     "    wandb.init(\n",
-    "        project=\"intro\",\n",
+    "        project=\"dlai-intro\",\n",
     "        config=config,\n",
     "        anonymous=\"allow\",\n",
     "    )\n",
@@ -176,253 +175,63 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee",
+   "execution_count": null,
+   "id": "8401cf96",
    "metadata": {},
    "outputs": [],
    "source": [
-    "config = SimpleNamespace(\n",
-    "    epochs = 3,\n",
-    "    batch_size = 128,\n",
-    "    lr = 1e-3,\n",
-    "    dropout = 0.1,\n",
-    "    slice_size = 10_000,\n",
-    "    valid_pct = 0.2,\n",
-    ")"
+    "def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):\n",
+    "    \"Compute the performance of the model on the validation dataset and log a wandb.Table\"\n",
+    "    model.eval()\n",
+    "    val_loss = 0.0\n",
+    "    correct = 0\n",
+    "\n",
+    "    with torch.inference_mode():\n",
+    "        for i, (images, labels) in enumerate(valid_dl):\n",
+    "            images, labels = images.to(DEVICE), labels.to(DEVICE)\n",
+    "\n",
+    "            # Forward pass\n",
+    "            outputs = model(images)\n",
+    "            val_loss += loss_func(outputs, labels) * labels.size(0)\n",
+    "\n",
+    "            # Compute accuracy and accumulate\n",
+    "            _, predicted = torch.max(outputs.data, 1)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "\n",
+    "            # Log one batch of images to the dashboard, always same batch_idx.\n",
+    "            if i == batch_idx and log_images:\n",
+    "                log_image_predictions_table(images, predicted, labels, outputs.softmax(dim=1))\n",
+    "\n",
+    "    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n",
+    "\n",
+    "\n",
+    "def log_image_predictions_table(images, predicted, labels, probs):\n",
+    "    \"Create a wandb Table to log images, labels, and predictions\"\n",
+    "    columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n",
+    "    table = wandb.Table(columns=columns)\n",
+    "    \n",
+    "    for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n",
+    "        table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n",
+    "    \n",
+    "    wandb.log({\"predictions_table\": table}, commit=False)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "b3df2485",
+   "metadata": {},
+   "source": [
+    "### Train model\n",
+    "Let's train the model with default config and check how it's doing in W&B. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Finishing last run (ID:lskq5lst) before initializing another..."
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c174ee7503f14722bee9a4d0db66a179",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='0.062 MB of 0.062 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run <strong style=\"color:#cdcd00\">robust-salad-10</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/lskq5lst' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/lskq5lst</a><br/>Synced 7 W&B file(s), 0 media file(s), 0 artifact file(s) and 2 other file(s)"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Find logs at: <code>./wandb/run-20230707_091843-lskq5lst/logs</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Successfully finished last run (ID:lskq5lst). Initializing new run:<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d2250f5729b647df90ebc0b6830c5d55",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670081783331625, max=1.0…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Tracking run with wandb version 0.15.5"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230707_091936-7ibfofq5</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5' target=\"_blank\">ruby-voice-11</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sprite shape: (89400, 16, 16, 3)\n",
-      "labels shape: (89400,)\n",
-      "sprite shape: (10000, 16, 16, 3)\n",
-      "labels shape: (10000,)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "02a9448faca44d53b9ae82b2db4ba30d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/3 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<style>\n",
-       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
-       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
-       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
-       "    </style>\n",
-       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>train/example_ct</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>train/train_loss</td><td>█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁██</td></tr><tr><td>val/val_loss</td><td>█▂▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>24000</td></tr><tr><td>train/train_loss</td><td>0.00699</td></tr><tr><td>val/val_accuracy</td><td>1.0</td></tr><tr><td>val/val_loss</td><td>0.00692</td></tr></table><br/></div></div>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run <strong style=\"color:#cdcd00\">ruby-voice-11</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5</a><br/>Synced 7 W&B file(s), 1 media file(s), 122 artifact file(s) and 1 other file(s)"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Find logs at: <code>./wandb/run-20230707_091936-7ibfofq5/logs</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "train_model(config)"
    ]
@@ -433,176 +242,43 @@
    "id": "e9ecf01d",
    "metadata": {},
    "source": [
-    "Let's try with another value of dropout:"
+    "Let's try with other values of learning rate:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "4f40520a-66f8-4415-9e36-174dda06aca0",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "264ff7030aa5490daaab7d7f573f268d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670244300000074, max=1.0…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Tracking run with wandb version 0.15.5"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230707_092004-5pt6qfs9</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9' target=\"_blank\">zesty-oath-12</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/intro' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sprite shape: (89400, 16, 16, 3)\n",
-      "labels shape: (89400,)\n",
-      "sprite shape: (10000, 16, 16, 3)\n",
-      "labels shape: (10000,)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "56d2c401c89f4eb797175dff9ddbf7d0",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/3 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<style>\n",
-       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
-       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
-       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
-       "    </style>\n",
-       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>train/epoch</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>train/example_ct</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███</td></tr><tr><td>train/train_loss</td><td>█▅▃▂▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>val/val_accuracy</td><td>▁▆█</td></tr><tr><td>val/val_loss</td><td>█▃▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>test_accuracy</td><td>0.8</td></tr><tr><td>train/epoch</td><td>3.0</td></tr><tr><td>train/example_ct</td><td>24000</td></tr><tr><td>train/train_loss</td><td>0.01679</td></tr><tr><td>val/val_accuracy</td><td>1.0</td></tr><tr><td>val/val_loss</td><td>0.00977</td></tr></table><br/></div></div>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run <strong style=\"color:#cdcd00\">zesty-oath-12</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9</a><br/>Synced 7 W&B file(s), 1 media file(s), 124 artifact file(s) and 2 other file(s)"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Find logs at: <code>./wandb/run-20230707_092004-5pt6qfs9/logs</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "config.dropout = 0.5\n",
+    "config.lr = 1e-4\n",
     "train_model(config)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "09008d54",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config.lr = 1e-3\n",
+    "train_model(config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d83ea0a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -616,7 +292,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.8.13"
   }
  },
  "nbformat": 4,

From d46b3fab85a358eba43f9e790334d35e0e331114 Mon Sep 17 00:00:00 2001
From: kldarek <darek.kleczek@gmail.com>
Date: Tue, 11 Jul 2023 18:22:38 +0200
Subject: [PATCH 22/43] nb 03

---
 dlai/03_llm_eval.ipynb | 139 ++++++++++++++++++++++++++++++++---------
 1 file changed, 109 insertions(+), 30 deletions(-)

diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb
index 797527c1..c0a4aa7c 100644
--- a/dlai/03_llm_eval.ipynb
+++ b/dlai/03_llm_eval.ipynb
@@ -13,15 +13,16 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2",
    "metadata": {},
    "source": [
-    "# LLM Tracing with W&B\n",
+    "# LLM Evaluation and Tracing with W&B\n",
     "\n",
-    "## 1. Auto-logging\n",
+    "## 1. Using Tables for Evaluation\n",
     "\n",
-    "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B autologging, also available for other popular LLMs and libraries such as Cohere or HuggingFace Pipelines. "
+    "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B Tables to evaluate the generations. "
    ]
   },
   {
@@ -38,15 +39,12 @@
     "\n",
     "import openai\n",
     "\n",
-    "from rich.markdown import Markdown\n",
-    "import pandas as pd\n",
     "from tenacity import (\n",
     "    retry,\n",
     "    stop_after_attempt,\n",
     "    wait_random_exponential, # for exponential backoff\n",
     ")  \n",
     "import wandb\n",
-    "from wandb.integration.openai import autolog\n",
     "from wandb_addons.prompts import Trace"
    ]
   },
@@ -57,39 +55,50 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "PROJECT = \"deeplearningai-llm\""
+    "PROJECT = \"dlai-llm\"\n",
+    "MODEL_NAME = \"gpt-3.5-turbo\""
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b",
+   "id": "bb575380",
    "metadata": {},
    "outputs": [],
    "source": [
-    "autolog({\"project\":PROJECT, \"job_type\": \"generation\", \"anonymous\":\"allow\"})"
+    "# wandb.login() # uncomment if you want to login to wandb"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b2ab394b-295b-4cfa-aade-aa274003a56a",
+   "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b",
    "metadata": {},
    "outputs": [],
    "source": [
-    "@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))\n",
-    "def completion_with_backoff(**kwargs):\n",
-    "    return openai.ChatCompletion.create(**kwargs)"
+    "run = wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "4e7bcf11",
+   "metadata": {},
+   "source": [
+    "### Simple generations\n",
+    "Let's start by generating names for our game assets using OpenAI `ChatCompletion`, and saving the resulting generations in W&B Tables. "
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "076e62a1-188f-47e1-bda9-5e3619e7d4ba",
+   "id": "b2ab394b-295b-4cfa-aade-aa274003a56a",
    "metadata": {},
    "outputs": [],
    "source": [
-    "MODEL_NAME = \"gpt-3.5-turbo\""
+    "@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))\n",
+    "def completion_with_backoff(**kwargs):\n",
+    "    return openai.ChatCompletion.create(**kwargs)"
    ]
   },
   {
@@ -99,19 +108,31 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def generate_and_print(system_prompt, user_prompt, n=5):\n",
+    "def generate_and_print(system_prompt, user_prompt, table, n=5):\n",
     "    messages=[\n",
     "            {\"role\": \"system\", \"content\": system_prompt},\n",
     "            {\"role\": \"user\", \"content\": user_prompt},\n",
     "        ]\n",
+    "    start_time = time.time()\n",
     "    responses = completion_with_backoff(\n",
     "        model=MODEL_NAME,\n",
     "        messages=messages,\n",
     "        n = n,\n",
     "        )\n",
+    "    elapsed_time = time.time() - start_time\n",
     "    for response in responses.choices:\n",
     "        generation = response.message.content\n",
-    "        display(Markdown(generation))"
+    "        print(generation)\n",
+    "    table.add_data(system_prompt,\n",
+    "                user_prompt,\n",
+    "                [response.message.content for response in responses.choices],\n",
+    "                elapsed_time,\n",
+    "                datetime.datetime.fromtimestamp(responses.created),\n",
+    "                responses.model,\n",
+    "                responses.usage.prompt_tokens,\n",
+    "                responses.usage.completion_tokens,\n",
+    "                responses.usage.total_tokens\n",
+    "                )"
    ]
   },
   {
@@ -123,9 +144,32 @@
    "source": [
     "system_prompt = \"\"\"You are a creative copywriter.\n",
     "You're given a category of game asset, and your goal is to design a name of that asset.\n",
-    "The game is set in a fantasy world where everyone laughs and respects each other, while celebrating diversity.\"\"\"\n",
+    "The game is set in a fantasy world where everyone laughs and respects each other, \n",
+    "while celebrating diversity.\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "395880fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define W&B Table to store generations\n",
+    "columns = [\"system_prompt\", \"user_prompt\", \"generations\", \"elapsed_time\", \"timestamp\",\\\n",
+    "            \"model\", \"prompt_tokens\", \"completion_tokens\", \"total_tokens\"]\n",
+    "table = wandb.Table(columns=columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6fb07587",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "user_prompt = \"hero\"\n",
-    "generate_and_print(system_prompt, user_prompt)"
+    "generate_and_print(system_prompt, user_prompt, table)"
    ]
   },
   {
@@ -136,7 +180,7 @@
    "outputs": [],
    "source": [
     "user_prompt = \"jewel\"\n",
-    "generate_and_print(system_prompt, user_prompt)"
+    "generate_and_print(system_prompt, user_prompt, table)"
    ]
   },
   {
@@ -146,17 +190,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "wandb.finish()"
+    "wandb.log({\"simple_generations\": table})\n",
+    "run.finish()"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "16d6d513-389d-4c67-a942-a922bce6ff1a",
    "metadata": {},
    "source": [
     "## 2. Using Tracer to log more complex chains\n",
     "\n",
-    "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario."
+    "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario. We will log the inputs and outputs, start and end times, whether the OpenAI call was successful, the token usage, and additional metadata."
    ]
   },
   {
@@ -184,7 +230,8 @@
     "model_name = \"gpt-3.5-turbo\"\n",
     "temperature = 0.7\n",
     "system_message = \"\"\"You are a creative copywriter. \n",
-    "You're given a category of game asset, a fantasy world, and your goal is to design a name of that asset.\n",
+    "You're given a category of game asset and a fantasy world.\n",
+    "Your goal is to design a name of that asset.\n",
     "Provide the resulting name only, no additional description.\n",
     "Single name, max 3 words output, remember!\"\"\""
    ]
@@ -204,7 +251,9 @@
     "          name=\"MyCreativeChain\",\n",
     "          kind=\"chain\",\n",
     "          start_time_ms=start_time_ms,\n",
-    "          metadata={\"user\": \"student_1\"})\n",
+    "          metadata={\"user\": \"student_1\"},\n",
+    "          model_dict={\"_kind\": \"CreativeChain\"}\n",
+    "          )\n",
     "\n",
     "    # part 2 - your chain picks a fantasy world\n",
     "    time.sleep(3)\n",
@@ -220,7 +269,9 @@
     "          start_time_ms=start_time_ms,\n",
     "          end_time_ms=tool_end_time_ms,\n",
     "          inputs={\"input\": query},\n",
-    "          outputs={\"result\": expanded_prompt})\n",
+    "          outputs={\"result\": expanded_prompt},\n",
+    "          model_dict={\"_kind\": \"tool\", \"num_worlds\": len(worlds)}\n",
+    "          )\n",
     "\n",
     "    # add the TOOL span as a child of the root\n",
     "    root_span.add_child(tool_span)\n",
@@ -233,6 +284,7 @@
     "\n",
     "    response = openai.ChatCompletion.create(model=model_name,\n",
     "                                            messages=messages,\n",
+    "                                            max_tokens=12,\n",
     "                                            temperature=temperature)   \n",
     "\n",
     "    llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)\n",
@@ -250,6 +302,7 @@
     "          end_time_ms=llm_end_time_ms,\n",
     "          inputs={\"system_prompt\":system_message, \"query\":expanded_prompt},\n",
     "          outputs={\"response\": response_text},\n",
+    "          model_dict={\"_kind\": \"Openai\", \"engine\": response[\"model\"], \"model\": response[\"object\"]}\n",
     "          )\n",
     "\n",
     "    # add the LLM span as a child of the Chain span...\n",
@@ -275,8 +328,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")\n",
-    "\n",
+    "# Let's start a new wandb run\n",
+    "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7409a004",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "run_creative_chain(\"hero\")"
    ]
   },
@@ -301,6 +363,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "1ccc075f-32bf-4451-b7ad-ab2a49cc86b6",
    "metadata": {},
@@ -377,7 +440,7 @@
     "        self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None\n",
     "    ) -> str:\n",
     "        \"\"\"Use the tool asynchronously.\"\"\"\n",
-    "        raise NotImplementedError(\"custom_search does not support async\")\n",
+    "        raise NotImplementedError(\"pick_world does not support async\")\n",
     "        \n",
     "class NameValidatorTool(BaseTool):\n",
     "    name = \"validate_name\"\n",
@@ -397,7 +460,7 @@
     "        self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None\n",
     "    ) -> str:\n",
     "        \"\"\"Use the tool asynchronously.\"\"\"\n",
-    "        raise NotImplementedError(\"custom_search does not support async\")"
+    "        raise NotImplementedError(\"validate_name does not support async\")"
    ]
   },
   {
@@ -407,7 +470,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "llm = ChatOpenAI(temperature=0)"
+    "llm = ChatOpenAI(temperature=0.7)"
    ]
   },
   {
@@ -476,6 +539,22 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "643f6295",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93462bd0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From 63a90742f41eb9a0a4e9a71502142ac00d0836ea Mon Sep 17 00:00:00 2001
From: kldarek <darek.kleczek@gmail.com>
Date: Tue, 11 Jul 2023 22:14:01 +0000
Subject: [PATCH 23/43] nb 04

---
 dlai/04_train_llm.ipynb | 581 ++++++++--------------------------------
 1 file changed, 105 insertions(+), 476 deletions(-)

diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb
index 3fbef4e5..ccb7b550 100644
--- a/dlai/04_train_llm.ipynb
+++ b/dlai/04_train_llm.ipynb
@@ -1,18 +1,17 @@
 {
  "cells": [
   {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "fe0ed738-872b-4952-baf8-b3f7c92214a0",
+   "cell_type": "markdown",
+   "id": "1dfae479-9399-492d-acaa-d9751615ee86",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# !pip install transformers accelerate dataset"
+    "# Finetuning a language model\n",
+    "Let's see how to finetune a language model to generate character backstories using HuggingFace Trainer with wandb integration. We'll use a tiny language model (`TinyStories-33M`) due to resource constraints, but the lessons you learn here should be applicable to large models too!"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "f64e095c-d28b-44aa-a122-4121c5c66a5b",
    "metadata": {},
    "outputs": [],
@@ -22,531 +21,173 @@
     "from transformers import AutoTokenizer\n",
     "from datasets import load_dataset\n",
     "from transformers import AutoModelForCausalLM\n",
-    "from transformers import Trainer, TrainingArguments"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3fd80268-c4a1-4e1a-aed3-cd5c3ab4d48f",
-   "metadata": {},
-   "source": [
-    "Load a dataset from Huggingface"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "a7535b8b-d220-44e8-a56c-97e250c36596",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Found cached dataset parquet (/home/tcapelle/.cache/huggingface/datasets/MohamedRashad___parquet/MohamedRashad--characters_backstories-6398ba4bb1a6e421/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "899f1c4acc1a40d19459e9323bc75960",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/1 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "ds = load_dataset('MohamedRashad/characters_backstories')"
+    "from transformers import Trainer, TrainingArguments\n",
+    "\n",
+    "import wandb"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "78879ff2-7eca-4b57-83f8-00b203f9e65d",
+   "execution_count": null,
+   "id": "2286ae41-213d-480d-a4ba-8c4e2e1c4771",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "DatasetDict({\n",
-       "    train: Dataset({\n",
-       "        features: ['text', 'target'],\n",
-       "        num_rows: 2322\n",
-       "    })\n",
-       "})"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "ds"
+    "model_checkpoint = \"roneneldan/TinyStories-33M\""
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "6a58c980-50ce-4d57-8d58-13a4bbda7249",
+   "id": "3fd80268-c4a1-4e1a-aed3-cd5c3ab4d48f",
    "metadata": {},
    "source": [
-    "As this dataset has no validation split, we will create one:"
+    "### Preparing data\n",
+    "\n",
+    "We'll start by loading a dataset containing Dungeons and Dragons character biographies from Huggingface. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "7dae9106-8015-43da-a6d9-1124dee4bdde",
+   "execution_count": null,
+   "id": "a7535b8b-d220-44e8-a56c-97e250c36596",
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds = ds[\"train\"].train_test_split(test_size=0.2)"
+    "ds = load_dataset('MohamedRashad/characters_backstories')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "2286ae41-213d-480d-a4ba-8c4e2e1c4771",
+   "execution_count": null,
+   "id": "13caeb7f-8a07-4ca2-a770-5b627238c2ac",
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_checkpoint = \"roneneldan/TinyStories-33M\"  # distilgpt2"
+    "# Let's take a look at one example\n",
+    "ds[\"train\"][400]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "26dfa0b7-8d9f-44f3-9e09-bc12bcb5ae0b",
+   "execution_count": null,
+   "id": "7dae9106-8015-43da-a6d9-1124dee4bdde",
    "metadata": {},
    "outputs": [],
    "source": [
-    "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f4f7a79d-9519-4133-a8cd-0a2bc59ee97b",
-   "metadata": {},
-   "source": [
-    "We can now call the tokenizer on all our texts. This is very simple, using the map method from the Datasets library. First we define a function that call the tokenizer on our texts:"
+    "# As this dataset has no validation split, we will create one\n",
+    "ds = ds[\"train\"].train_test_split(test_size=0.2)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "0ea05869-8ece-4a82-b9d4-3a62a84b6a77",
+   "execution_count": null,
+   "id": "7ea1602d-504b-43de-87ad-fcb35b9e61f7",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def tokenize_function(examples):\n",
-    "    return tokenizer(examples[\"target\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0413ebca-019b-49dc-b042-cf3cb20bf26c",
-   "metadata": {},
-   "source": [
-    "Then we apply it to all the splits in our `datasets` object, using `batched=True` and 4 processes to speed up the preprocessing. We won't need the `text` column afterward, so we discard it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "2ce65c5f-8227-4c41-9e96-dfc80de611be",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'text': 'Generate Backstory based on following information\\nCharacter Name: Vaskir Tempus\\nCharacter Race: Yuan-ti abomination\\nCharacter Class: Paladin of treachery/ goo bladelock\\n\\nOutput:\\n',\n",
-       " 'target': 'Vaskir is an exiled yuan ti who forsook his religion to worship the great old one dendar. he is a master swordsman who wields a greatsword in tandem with a longsword, effectively dualwielding the huge blade. He is chaotic evil, believing that government and law holds back all of humanity from their goals, keeping them oppressed and subjugated under the foot of the highest ruler'}"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ds[\"train\"][232]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "44c0f6c6-8b87-4dc7-b138-f3c9d6cac163",
-   "metadata": {},
-   "source": [
-    "we want to grab the characters backstories in the `target` column"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "22368c91-ddf8-4b08-848e-f732ff155494",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/1857 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (2264 > 2048). Running this sequence through the model will result in indexing errors\n",
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (2812 > 2048). Running this sequence through the model will result in indexing errors\n",
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (2573 > 2048). Running this sequence through the model will result in indexing errors\n",
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (2952 > 2048). Running this sequence through the model will result in indexing errors\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/465 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (2661 > 2048). Running this sequence through the model will result in indexing errors\n",
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (4725 > 2048). Running this sequence through the model will result in indexing errors\n",
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (2464 > 2048). Running this sequence through the model will result in indexing errors\n",
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (3121 > 2048). Running this sequence through the model will result in indexing errors\n"
-     ]
-    }
-   ],
-   "source": [
-    "tokenized_datasets = ds.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\", \"target\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "d59cc8a9-5f87-4eb7-abbc-f4fc18fea51d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[13]"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenized_datasets[\"train\"][1][\"input_ids\"][0:10]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "70bc12ae-52dc-47ad-b9ef-1e5b8af829e8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'.'"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.decode(tokenized_datasets[\"train\"][1][\"input_ids\"])"
+    "# We'll create a tokenizer from model checkpoint\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)\n",
+    "\n",
+    "# We'll need padding to have same length sequences in a batch\n",
+    "tokenizer.pad_token = tokenizer.eos_token\n",
+    "\n",
+    "# Define a tokenization function that first concatenates text and target\n",
+    "def tokenize_function(example):\n",
+    "    merged = example[\"text\"] + \" \" + example[\"target\"]\n",
+    "    batch = tokenizer(merged, padding='max_length', truncation=True, max_length=128)\n",
+    "    batch[\"labels\"] = batch[\"input_ids\"].copy()\n",
+    "    return batch\n",
+    "\n",
+    "# Apply it on our dataset, and remove the text columns\n",
+    "tokenized_datasets = ds.map(tokenize_function, remove_columns=[\"text\", \"target\"])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "85c6ad00-3825-4f12-be49-8ff336d5d398",
+   "execution_count": null,
+   "id": "a42417b8-ffa8-4d96-92ea-d8d949d87d5e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "block_size = 256\n",
-    "\n",
-    "def group_texts(examples):\n",
-    "    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n",
-    "    total_length = len(concatenated_examples[list(examples.keys())[0]])\n",
-    "    total_length = (total_length // block_size) * block_size\n",
-    "    # Split by chunks of max_len.\n",
-    "    result = {\n",
-    "        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n",
-    "        for k, t in concatenated_examples.items()\n",
-    "    }\n",
-    "    result[\"labels\"] = result[\"input_ids\"].copy()\n",
-    "    return result"
+    "# Let's check out one prepared example\n",
+    "print(tokenizer.decode(tokenized_datasets[\"train\"][900]['input_ids']))"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "26a73750-5e38-4236-a5c3-b356d8041dc3",
-   "metadata": {},
-   "source": [
-    "First note that we duplicate the inputs for our labels. This is because the model of the 🤗 Transformers library apply the shifting to the right, so we don't need to do it manually.\n",
-    "\n",
-    "Also note that by default, the `map` method will send a batch of 1,000 examples to be treated by the preprocessing function. So here, we will drop the remainder to make the concatenated tokenized texts a multiple of `block_size` every 1,000 examples. You can adjust this behavior by passing a higher batch size (which will also be processed slower). You can also speed-up the preprocessing by using multiprocessing:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "572f29c8-84d3-45b9-b8df-26de8c22bc91",
+   "id": "2e8d6b17-a63d-41f1-92cf-416064b52156",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/1857 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Map (num_proc=4):   0%|          | 0/465 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
    "source": [
-    "lm_datasets = tokenized_datasets.map(\n",
-    "    group_texts,\n",
-    "    batched=True,\n",
-    "    batch_size=1000,\n",
-    "    num_proc=4,\n",
-    ")"
+    "### Training\n",
+    "Let's finetune a pretrained language model on our dataset using HF Transformers and their wandb integration. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "b4f131eb-979e-40f6-9e28-19756beaa8e4",
    "metadata": {},
    "outputs": [],
    "source": [
-    "model = AutoModelForCausalLM.from_pretrained(model_checkpoint)"
+    "# We will train a causal (autoregressive) language model from a pretrained checkpoint\n",
+    "model = AutoModelForCausalLM.from_pretrained(model_checkpoint);"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "7345ab23-8d12-4d4c-a39d-bb2202bff218",
    "metadata": {},
    "outputs": [],
    "source": [
-    "os.environ[\"WANDB_PROJECT\"] = \"tiny-stories-characters\""
+    "# Start a new wandb run\n",
+    "run = wandb.init(project='dlai-lm-tuning')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "d74ee155-3c30-4ef2-9c4d-fd8ee222c50c",
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Define training arguments\n",
     "model_name = model_checkpoint.split(\"/\")[-1]\n",
     "training_args = TrainingArguments(\n",
     "    f\"{model_name}-finetuned-characters-backstories\",\n",
-    "    report_to=\"wandb\",\n",
+    "    report_to=\"wandb\", # we need one line to track experiments in wandb\n",
+    "    num_train_epochs=3,\n",
     "    logging_steps=1,\n",
     "    evaluation_strategy = \"epoch\",\n",
-    "    learning_rate=2e-5,\n",
+    "    learning_rate=1e-4,\n",
     "    weight_decay=0.01,\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "id": "af62105f-a478-436f-88a2-5c1d78b9d20a",
    "metadata": {},
    "outputs": [],
    "source": [
+    "# We'll use HF Trainer\n",
     "trainer = Trainer(\n",
     "    model=model,\n",
     "    args=training_args,\n",
-    "    train_dataset=lm_datasets[\"train\"],\n",
-    "    eval_dataset=lm_datasets[\"test\"],\n",
+    "    train_dataset=tokenized_datasets[\"train\"],\n",
+    "    eval_dataset=tokenized_datasets[\"test\"],\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "id": "01958a56-c22a-4a27-bc71-41c59fc97f05",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "Tracking run with wandb version 0.15.5"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_130647-l0pa7ivo</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters/runs/l0pa7ivo' target=\"_blank\">dulcet-cherry-3</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters/runs/l0pa7ivo' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters/runs/l0pa7ivo</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div>\n",
-       "      \n",
-       "      <progress value='816' max='816' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [816/816 01:26, Epoch 3/3]\n",
-       "    </div>\n",
-       "    <table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       " <tr style=\"text-align: left;\">\n",
-       "      <th>Epoch</th>\n",
-       "      <th>Training Loss</th>\n",
-       "      <th>Validation Loss</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>1</td>\n",
-       "      <td>5.795800</td>\n",
-       "      <td>5.142995</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>2</td>\n",
-       "      <td>4.887100</td>\n",
-       "      <td>5.009582</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>3</td>\n",
-       "      <td>4.886900</td>\n",
-       "      <td>4.998519</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table><p>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "TrainOutput(global_step=816, training_loss=5.0707503650702686, metrics={'train_runtime': 92.5014, 'train_samples_per_second': 70.572, 'train_steps_per_second': 8.821, 'total_flos': 284203589566464.0, 'train_loss': 5.0707503650702686, 'epoch': 3.0})"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
+    "# Let's train!\n",
     "trainer.train()"
    ]
   },
@@ -555,12 +196,13 @@
    "id": "0a247e01-35d0-454f-8b7e-5f24cdf66f33",
    "metadata": {},
    "source": [
-    "## Generate"
+    "### Generate\n",
+    "Let's use our trained model to generate some texts with our provided prompts and save them in W&B Table. The model is tiny, replace it with a bigger one to get better results!"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "id": "6f16d43d-445f-4df5-8734-85584f95792f",
    "metadata": {},
    "outputs": [],
@@ -571,71 +213,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "id": "7911e43f-f4ce-4855-9f68-662438af8d24",
    "metadata": {},
    "outputs": [],
    "source": [
-    "prompt = \"The hero was half human and cat, his strenghts were\"\n",
+    "prompts = [\n",
+    "    \"Generate Backstory based on following information Character Name: Frogger Character Race: Aarakocra Character Class: Ranger Output: \",\n",
+    "    \"Generate Backstory based on following information Character Name: Smarty Character Race: Aasimar Character Class: Cleric Output: \",\n",
+    "    \"Generate Backstory based on following information Character Name: Volcano Character Race: Android Character Class: Paladin Output: \",\n",
+    "]\n",
+    "\n",
+    "table = wandb.Table(columns=[\"prompt\", \"generation\"])\n",
     "\n",
-    "input_ids = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)"
+    "for prompt in prompts:\n",
+    "    input_ids = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)\n",
+    "    output = model.generate(input_ids, do_sample=True, max_new_tokens=50, top_p=0.3)\n",
+    "    output_text = tokenizer.decode(output[0], skip_special_tokens=True)\n",
+    "    table.add_data(prompt, output_text)\n",
+    "    \n",
+    "wandb.log({'tiny_generations': table})"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "id": "844802b9-0ffc-466e-bedb-d7b7c6f337de",
+   "execution_count": null,
+   "id": "3083c6a3-fdb8-44ab-a028-c0a222a2fdef",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[  464,  4293,   373,  2063,  1692,   290,  3797,    11,   465, 43071,\n",
-       "           456,   912,   547]], device='cuda:0')"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "input_ids"
+    "wandb.finish()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
-   "id": "e0883650-ab62-49c9-88d8-7f8c4fdfb0a9",
+   "execution_count": null,
+   "id": "120d5e88-2460-4716-bcba-077ff4630772",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
-      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The hero was half human and cat, his strenghts were the first to be. He was the only one who had a lot of power, and he was the only one who had a lot of power. He was a great wizard, and he was the only one who could do it. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great\n"
-     ]
-    }
-   ],
-   "source": [
-    "output = model.generate(input_ids, max_length = 128, num_beams=1)\n",
-    "output_text = tokenizer.decode(output[0], skip_special_tokens=True)\n",
-    "\n",
-    "print(output_text)"
-   ]
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "976edc1f-418d-47a6-88e2-ca37e3b25366",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -649,7 +278,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.8.0"
   }
  },
  "nbformat": 4,

From 353adc0c5af4eebd43ea3c9fac4f44f3b507e864 Mon Sep 17 00:00:00 2001
From: kldarek <darek.kleczek@gmail.com>
Date: Tue, 11 Jul 2023 22:16:31 +0000
Subject: [PATCH 24/43] nb 04 anonymouse

---
 dlai/04_train_llm.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb
index ccb7b550..fcf20b6f 100644
--- a/dlai/04_train_llm.ipynb
+++ b/dlai/04_train_llm.ipynb
@@ -141,7 +141,7 @@
    "outputs": [],
    "source": [
     "# Start a new wandb run\n",
-    "run = wandb.init(project='dlai-lm-tuning')"
+    "run = wandb.init(project='dlai-lm-tuning', job_type=\"training\", anonymous=\"allow\")"
    ]
   },
   {

From 909510d999981a96ea818efa86c26495b87758ab Mon Sep 17 00:00:00 2001
From: kldarek <darek.kleczek@gmail.com>
Date: Thu, 13 Jul 2023 16:21:17 +0000
Subject: [PATCH 25/43] revision;

---
 dlai/00_intro.ipynb              |  83 ++++---
 dlai/01_diffusion_training.ipynb | 159 ++-----------
 dlai/02_diffusion_sampling.ipynb | 368 +++----------------------------
 dlai/03_llm_eval.ipynb           |   6 +-
 dlai/04_train_llm.ipynb          |   4 +-
 dlai/requirements.txt            | 211 ++++++++++++++++--
 6 files changed, 282 insertions(+), 549 deletions(-)

diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb
index ac282235..54b61396 100644
--- a/dlai/00_intro.ipynb
+++ b/dlai/00_intro.ipynb
@@ -1,7 +1,6 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "dbfb9335",
    "metadata": {},
@@ -21,8 +20,8 @@
     "import math\n",
     "from pathlib import Path\n",
     "from types import SimpleNamespace\n",
-    "\n",
     "import wandb\n",
+    "\n",
     "from tqdm.auto import tqdm\n",
     "import torch\n",
     "import torch.nn as nn\n",
@@ -33,27 +32,6 @@
    ]
   },
   {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "c4cac7d2",
-   "metadata": {},
-   "source": [
-    "### W&B account\n",
-    "[Sign up](https://wandb.ai/site) for a free account at https://wandb.ai/site and then login to your wandb account to store the results of your experiments and use advanced W&B features. You can also continue to learn in anonymous mode. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# wandb.login() # uncomment if you want to login to wandb"
-   ]
-  },
-  {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "2e0bfcc9",
    "metadata": {},
@@ -117,12 +95,12 @@
    "source": [
     "def train_model(config):\n",
     "    \"Train a model with a given config\"\n",
+    "    # Start a wandb run\n",
     "    wandb.init(\n",
     "        project=\"dlai-intro\",\n",
     "        config=config,\n",
     "        anonymous=\"allow\",\n",
     "    )\n",
-    "\n",
     "    # Get the data\n",
     "    train_dl, valid_dl = get_dataloaders(DATA_DIR, \n",
     "                                         config.batch_size, \n",
@@ -157,9 +135,10 @@
     "                \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch))/n_steps_per_epoch,\n",
     "                \"train/example_ct\": example_ct\n",
     "            }\n",
+    "            # Log train metrics to wandb\n",
     "            wandb.log(metrics)\n",
-    "\n",
-    "        # compute validation metrics, log images on last epoch\n",
+    "            \n",
+    "        # Compute validation metrics, log images on last epoch\n",
     "        val_loss, accuracy = validate_model(model, valid_dl, loss_func, \n",
     "                                            log_images=(epoch == (config.epochs - 1)))\n",
     "\n",
@@ -168,7 +147,8 @@
     "            \"val/val_loss\": val_loss,\n",
     "            \"val/val_accuracy\": accuracy\n",
     "        }\n",
-    "        wandb.log({**metrics, **val_metrics})\n",
+    "        # Log validation metrics to wandb\n",
+    "        wandb.log(val_metrics)\n",
     "\n",
     "    wandb.finish()"
    ]
@@ -204,7 +184,6 @@
     "\n",
     "    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n",
     "\n",
-    "\n",
     "def log_image_predictions_table(images, predicted, labels, probs):\n",
     "    \"Create a wandb Table to log images, labels, and predictions\"\n",
     "    columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n",
@@ -213,57 +192,75 @@
     "    for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n",
     "        table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n",
     "    \n",
-    "    wandb.log({\"predictions_table\": table}, commit=False)"
+    "    wandb.log({\"predictions_table\": table}, commit=False)\n"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
-   "id": "b3df2485",
+   "id": "c4cac7d2",
    "metadata": {},
    "source": [
-    "### Train model\n",
-    "Let's train the model with default config and check how it's doing in W&B. "
+    "### W&B account\n",
+    "[Sign up](https://wandb.ai/site) for a free account at https://wandb.ai/site and then login to your wandb account to store the results of your experiments and use advanced W&B features. You can also continue to learn in anonymous mode. "
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6",
+   "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501",
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_model(config)"
+    "# wandb.login() # uncomment if you want to login to wandb"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
-   "id": "e9ecf01d",
+   "id": "b3df2485",
    "metadata": {},
    "source": [
-    "Let's try with other values of learning rate:"
+    "### Train model\n",
+    "Let's train the model with default config and check how it's doing in W&B. "
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4f40520a-66f8-4415-9e36-174dda06aca0",
+   "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6",
    "metadata": {},
    "outputs": [],
    "source": [
-    "config.lr = 1e-4\n",
     "train_model(config)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "09008d54",
+   "id": "6d8af6b3-fdec-4f46-90b4-28585257e9cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config.epochs = 3\n",
+    "train_model(config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e9ecf01d",
+   "metadata": {},
+   "source": [
+    "Let's try other values of hyperparameters:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "593f7b8d-216c-4b51-a389-eaae195e5e74",
    "metadata": {},
    "outputs": [],
    "source": [
-    "config.lr = 1e-3\n",
+    "config.epochs = 1\n",
+    "config.lr = 1e-4\n",
     "train_model(config)"
    ]
   },
@@ -292,7 +289,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index 6b038bb3..d01015ed 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -1,7 +1,6 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "958524a2-cb56-439e-850e-032dd10478f2",
    "metadata": {},
@@ -16,7 +15,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "700e687c",
    "metadata": {
     "tags": []
@@ -37,16 +36,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "b88f9513",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# wandb.login() # uncomment if you want to login to wandb"
+    "# wandb.login(relogin=True) # uncomment if you want to login to wandb"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "7c0d229a",
    "metadata": {},
@@ -56,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "3d76c167-7122-4f88-9c9f-5ded96684fa5",
    "metadata": {
     "tags": []
@@ -104,7 +102,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "bb43f98f",
    "metadata": {},
@@ -114,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "a705d0a8",
    "metadata": {
     "tags": []
@@ -130,7 +127,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "6bc9001e",
    "metadata": {
     "tags": []
@@ -143,19 +140,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "76c63b85",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sprite shape: (89400, 16, 16, 3)\n",
-      "labels shape: (89400, 5)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# load dataset and construct optimizer\n",
     "dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", data_dir/\"sprite_labels_nc_1788_16x16.npy\")\n",
@@ -165,7 +153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "eb13689d",
    "metadata": {},
    "outputs": [],
@@ -176,7 +164,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "fe8eb277",
    "metadata": {},
@@ -185,7 +172,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3",
    "metadata": {},
@@ -195,7 +181,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "8b0f5bed",
    "metadata": {},
    "outputs": [],
@@ -211,7 +197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "16085a65",
    "metadata": {},
    "outputs": [],
@@ -239,7 +225,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "d9ed46d7",
    "metadata": {},
@@ -248,7 +233,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c",
    "metadata": {},
@@ -258,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "d88afdba",
    "metadata": {
     "tags": []
@@ -279,7 +263,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "26765a7e-4ddc-449e-95c3-54c58a564738",
    "metadata": {},
@@ -291,120 +274,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "f5f4af69",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "Tracking run with wandb version 0.15.5"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_123702-2bkmjqyt</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/2bkmjqyt' target=\"_blank\">lemon-galaxy-6</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/2bkmjqyt' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/2bkmjqyt</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7369da274a8a448e8b4d47071261a2f1",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/32 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "04dab6db5d12408dbe95aa9cf3475833",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/894 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[11], line 22\u001b[0m\n\u001b[1;32m     20\u001b[0m pred_noise \u001b[38;5;241m=\u001b[39m nn_model(x_pert, t \u001b[38;5;241m/\u001b[39m timesteps, c\u001b[38;5;241m=\u001b[39mc)      \n\u001b[1;32m     21\u001b[0m loss \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mmse_loss(pred_noise, noise)\n\u001b[0;32m---> 22\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m    \n\u001b[1;32m     23\u001b[0m optim\u001b[38;5;241m.\u001b[39mstep()\n\u001b[1;32m     25\u001b[0m \u001b[38;5;66;03m# we log the relevant metrics to the workspace\u001b[39;00m\n",
-      "File \u001b[0;32m~/mambaforge/envs/pt2/lib/python3.11/site-packages/torch/_tensor.py:487\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    477\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m    478\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m    479\u001b[0m         Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m    480\u001b[0m         (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    485\u001b[0m         inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m    486\u001b[0m     )\n\u001b[0;32m--> 487\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    488\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m    489\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/mambaforge/envs/pt2/lib/python3.11/site-packages/torch/autograd/__init__.py:200\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    195\u001b[0m     retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m    197\u001b[0m \u001b[38;5;66;03m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m    198\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m    199\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 200\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m    201\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    202\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# same code as before, added comments on the extra W&B instrumentation lines\n",
     "# create a wandb run\n",
@@ -470,7 +343,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -484,7 +357,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index 45ed8425..2dd0c663 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -1,7 +1,6 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "958524a2-cb56-439e-850e-032dd10478f2",
    "metadata": {},
@@ -14,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "700e687c",
    "metadata": {
     "tags": []
@@ -33,16 +32,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "beab0dad-c211-4e3c-ab80-de52788f27e2",
+   "execution_count": null,
+   "id": "dcaf7a29-782c-4735-991f-4408f5ec6128",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# wandb.login() # uncomment if you want to login to wandb"
+    "# wandb.login(relogin=True) # uncomment if you want to login to wandb"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "7c0d229a",
    "metadata": {},
@@ -52,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "54c3a942",
    "metadata": {
     "tags": []
@@ -86,7 +84,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "bb43f98f",
    "metadata": {},
@@ -96,7 +93,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "8ab66255",
    "metadata": {},
    "outputs": [],
@@ -130,24 +127,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "b47633e2",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m:   1 of 1 files downloaded.  \n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "nn_model = load_model(MODEL_ARTIFACT)"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "fe8eb277",
    "metadata": {},
@@ -156,7 +144,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3",
    "metadata": {},
@@ -166,7 +153,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "f6f479d1",
    "metadata": {},
    "outputs": [],
@@ -180,7 +167,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "8b0f5bed",
    "metadata": {},
    "outputs": [],
@@ -195,7 +182,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "895b2bbc-46cd-4be8-a672-e84ef69bc329",
    "metadata": {},
@@ -207,7 +193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "16085a65",
    "metadata": {},
    "outputs": [],
@@ -234,7 +220,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c",
    "metadata": {},
@@ -244,7 +229,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "id": "d88afdba",
    "metadata": {
     "tags": []
@@ -265,7 +250,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "1cbf9ef8-619a-4052-a138-a88c0f0f8b0b",
    "metadata": {},
@@ -275,7 +259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc",
    "metadata": {},
    "outputs": [],
@@ -294,7 +278,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "5fdfc048-47f0-43b5-983e-da715e1ed562",
    "metadata": {
     "tags": []
@@ -322,7 +306,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "aee10774-ff79-4df7-9b2d-1908561c23e5",
    "metadata": {},
@@ -332,7 +315,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2",
    "metadata": {},
    "outputs": [],
@@ -341,7 +324,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "90b838be-8fa1-4c12-9c4f-e40dfacc08e1",
    "metadata": {},
@@ -351,24 +333,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "89e24210-4885-4559-92e1-db10566ef5ea",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sampling timestep   1\r"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "ddpm_samples, _ = sample_ddpm_context(noises, ctx_vector)"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "836584a1-26b5-45b1-98c9-0c45d639c8f9",
    "metadata": {},
@@ -378,25 +351,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "id": "25b07c26-0ac2-428a-8351-34f8b7228074",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sampling timestep  20\r"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "ddim_samples, _ = sample_ddim_context(noises, ctx_vector, n=ddim_n)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "id": "af33d3c4",
    "metadata": {},
    "outputs": [],
@@ -407,7 +372,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "daea8275-0356-452e-a9f9-2824ef53f1ea",
    "metadata": {},
@@ -416,7 +380,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "85be303d-0f0b-4df4-8c87-bd1bfb6145a2",
    "metadata": {},
@@ -426,7 +389,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3",
    "metadata": {},
    "outputs": [],
@@ -439,7 +402,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "987cee86-2db1-4a2a-9d14-f70c6248ecb9",
    "metadata": {},
@@ -449,295 +411,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "Tracking run with wandb version 0.15.5"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_135129-c1jaiuwv</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv' target=\"_blank\">rare-thunder-9</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run <strong style=\"color:#cdcd00\">rare-thunder-9</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv</a><br/>Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 1 other file(s)"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_135129-c1jaiuwv/logs</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
    "source": [
     "with wandb.init(project=PROJECT, job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n",
     "    wandb.log({\"samplers_table\":table})"
    ]
   },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "a974258a-55fb-43ef-9136-985ec85bc3fc",
-   "metadata": {},
-   "source": [
-    "## Mixing classes during sampling"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sampling timestep   1\r"
-     ]
-    }
-   ],
-   "source": [
-    "ctx = torch.tensor([\n",
-    "    # hero, non-hero, food, spell, side-facing\n",
-    "    [1,0,0,0,0],      #human\n",
-    "    [1,0,0.6,0,0],    \n",
-    "    [0,0,0.6,0.4,0],  \n",
-    "    [1,0,0,0,1],  \n",
-    "    [1,1,0,0,0],\n",
-    "    [1,0,0,1,0]\n",
-    "]).float().to(device)\n",
-    "\n",
-    "# let's pass the same noise everytime\n",
-    "samples = torch.cat([torch.randn(1, 3, height, height)]*6, axis=0).to(device)  \n",
-    "ddpm_samples, _ = sample_ddpm_context(samples, ctx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "626ef616-dae4-4417-9219-d67ef0794e63",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "hero_table = wandb.Table(columns=[\"generation\", \n",
-    "                                  \"hero\", \n",
-    "                                  \"non-hero\", \n",
-    "                                  \"food\", \n",
-    "                                  \"spell\", \n",
-    "                                  \"side-facing\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for s, c in zip(ddpm_samples, ctx.cpu().numpy().tolist()):\n",
-    "    hero_table.add_data(wandb.Image(s), *c)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Tracking run with wandb version 0.15.5"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Run data is saved locally in <code>/home/tcapelle/work/edu/dlai/wandb/run-20230706_135310-u90wajwk</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk' target=\"_blank\">elated-eon-10</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run <strong style=\"color:#cdcd00\">elated-eon-10</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk</a><br/>Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Find logs at: <code>./wandb/run-20230706_135310-u90wajwk/logs</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "with wandb.init(project=PROJECT, job_type=\"sampling_mix\", config=config):\n",
-    "    wandb.log({\"hero_table\":hero_table})"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -749,7 +431,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -763,7 +445,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb
index c0a4aa7c..567bb33a 100644
--- a/dlai/03_llm_eval.ipynb
+++ b/dlai/03_llm_eval.ipynb
@@ -13,7 +13,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2",
    "metadata": {},
@@ -80,7 +79,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "4e7bcf11",
    "metadata": {},
@@ -195,7 +193,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "16d6d513-389d-4c67-a942-a922bce6ff1a",
    "metadata": {},
@@ -363,7 +360,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "1ccc075f-32bf-4451-b7ad-ab2a49cc86b6",
    "metadata": {},
@@ -573,7 +569,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb
index fcf20b6f..5148cb2c 100644
--- a/dlai/04_train_llm.ipynb
+++ b/dlai/04_train_llm.ipynb
@@ -156,7 +156,7 @@
     "training_args = TrainingArguments(\n",
     "    f\"{model_name}-finetuned-characters-backstories\",\n",
     "    report_to=\"wandb\", # we need one line to track experiments in wandb\n",
-    "    num_train_epochs=3,\n",
+    "    num_train_epochs=1,\n",
     "    logging_steps=1,\n",
     "    evaluation_strategy = \"epoch\",\n",
     "    learning_rate=1e-4,\n",
@@ -278,7 +278,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.0"
+   "version": "3.9.7"
   }
  },
  "nbformat": 4,
diff --git a/dlai/requirements.txt b/dlai/requirements.txt
index f8014cce..96f321ce 100644
--- a/dlai/requirements.txt
+++ b/dlai/requirements.txt
@@ -1,13 +1,198 @@
-torch>=2.0
-torchvision>=0.15
-matplotlib
-pandas
-numpy
-wandb
-tqdm
-openai
-tenacity
-rich
-transformers
-datasets
-accelerate
\ No newline at end of file
+accelerate==0.21.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1688651106312/work/dist
+appdirs==1.4.4
+argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1640817743617/work
+argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1649500321618/work
+asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1670263926556/work
+async-lru @ file:///home/conda/feedstock_root/build_artifacts/async-lru_1688997201545/work
+async-timeout==4.0.2
+attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work
+Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1677767029043/work
+backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
+backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1687772187254/work
+beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1680888073205/work
+bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1674535352125/work
+Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1648883617327/work
+build==0.10.0
+CacheControl==0.12.14
+certifi==2023.5.7
+cffi @ file:///tmp/abs_98z5h56wf8/croots/recipe/cffi_1659598650955/work
+charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1688813409104/work
+cleo==2.0.1
+click==8.1.5
+cmake==3.26.4
+contourpy==1.1.0
+crashtest==0.4.1
+cryptography==41.0.2
+cycler==0.11.0
+dataclasses-json==0.5.9
+datasets==2.13.1
+decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work
+defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work
+dill==0.3.6
+distlib==0.3.6
+docker-pycreds==0.4.0
+dulwich==0.21.5
+entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work
+exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1688381075899/work
+executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1667317341051/work
+fastjsonschema @ file:///home/conda/feedstock_root/build_artifacts/python-fastjsonschema_1684761244589/work/dist
+filelock==3.12.2
+flit_core @ file:///home/conda/feedstock_root/build_artifacts/flit-core_1684084314667/work/source/flit_core
+fonttools==4.41.0
+frozenlist==1.4.0
+fsspec==2023.6.0
+gitdb==4.0.10
+GitPython==3.1.32
+greenlet==2.0.2
+html5lib==1.1
+huggingface-hub==0.16.4
+idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1663625384323/work
+importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1688754491823/work
+importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1689017639396/work
+installer==0.7.0
+ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1620912942381/work/dist/ipykernel-5.5.5-py3-none-any.whl
+ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1685727741709/work
+ipython-genutils==0.2.0
+ipywidgets==8.0.7
+jaraco.classes==3.3.0
+jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1669134318875/work
+jeepney==0.8.0
+Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1654302431367/work
+json5 @ file:///home/conda/feedstock_root/build_artifacts/json5_1688248289187/work
+jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1669810440410/work
+jupyter-events @ file:///home/conda/feedstock_root/build_artifacts/jupyter_events_1673559782596/work
+jupyter-lsp @ file:///home/conda/feedstock_root/build_artifacts/jupyter-lsp-meta_1685453365113/work/jupyter-lsp
+jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1687700988094/work
+jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1686775603087/work
+jupyter_server @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_1687869799272/work
+jupyter_server_terminals @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_terminals_1673491454549/work
+jupyterlab @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_1689253413907/work
+jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1649936611996/work
+jupyterlab-widgets==3.0.8
+jupyterlab_server @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_server_1686659921555/work
+keyring==23.13.1
+kiwisolver==1.4.4
+langchain==0.0.232
+langsmith==0.0.5
+lit==16.0.6
+lockfile==0.12.2
+markdown-it-py==3.0.0
+MarkupSafe @ file:///opt/conda/conda-bld/markupsafe_1654597864307/work
+marshmallow==3.19.0
+marshmallow-enum==1.5.1
+matplotlib==3.7.2
+matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work
+mdurl==0.1.2
+mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1686313613819/work/dist
+more-itertools==9.1.0
+mpmath==1.3.0
+msgpack==1.0.5
+multidict==6.0.4
+multiprocess==0.70.14
+mypy-extensions==1.0.0
+nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1684790896106/work
+nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert-meta_1687202153002/work
+nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1688996247388/work
+networkx==3.1
+notebook_shim @ file:///home/conda/feedstock_root/build_artifacts/notebook-shim_1682360583588/work
+numexpr==2.8.4
+numpy==1.25.1
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+openai==0.27.8
+openapi-schema-pydantic==1.2.4
+overrides @ file:///home/conda/feedstock_root/build_artifacts/overrides_1666057828264/work
+packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1681337016113/work
+pandas==2.0.3
+pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work
+parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work
+pathtools==0.1.2
+pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1667297516076/work
+pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work
+Pillow==10.0.0
+pkginfo==1.9.6
+pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1633981968097/work
+platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1688739404342/work
+poetry==1.5.1
+poetry-core==1.6.1
+poetry-plugin-export==1.4.0
+prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1689032443210/work
+prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1688565951714/work
+protobuf==4.23.4
+psutil==5.9.5
+ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
+pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work
+pyarrow==12.0.1
+pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work
+pydantic==1.10.11
+Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1681904169130/work
+pyparsing==3.0.9
+pyproject_hooks==1.0.0
+pyrsistent @ file:///tmp/build/80754af9/pyrsistent_1636110951836/work
+PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work
+python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work
+python-json-logger @ file:///home/conda/feedstock_root/build_artifacts/python-json-logger_1677079630776/work
+pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1680088766131/work
+PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1648757097602/work
+pyzmq @ file:///croot/pyzmq_1686601365461/work
+rapidfuzz==2.15.1
+regex==2023.6.3
+requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1684774241324/work
+requests-toolbelt==1.0.0
+rfc3339-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3339-validator_1638811747357/work
+rfc3986-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3986-validator_1598024191506/work
+rich==13.4.2
+safetensors==0.3.1
+SecretStorage==3.3.3
+Send2Trash @ file:///home/conda/feedstock_root/build_artifacts/send2trash_1682601222253/work
+sentry-sdk==1.28.1
+setproctitle==1.3.2
+shellingham==1.5.0.post1
+six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work
+smmap==5.0.0
+sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work
+soupsieve @ file:///home/conda/feedstock_root/build_artifacts/soupsieve_1658207591808/work
+SQLAlchemy==2.0.18
+stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work
+sympy==1.12
+tenacity==8.2.2
+terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1670253674810/work
+tinycss2 @ file:///home/conda/feedstock_root/build_artifacts/tinycss2_1666100256010/work
+tokenizers==0.13.3
+tomli @ file:///home/conda/feedstock_root/build_artifacts/tomli_1644342247877/work
+tomlkit==0.11.8
+torch==2.0.1
+torchvision==0.15.2
+tornado @ file:///opt/conda/conda-bld/tornado_1662061693373/work
+tqdm==4.65.0
+traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work
+transformers==4.30.2
+triton==2.0.0
+trove-classifiers==2023.7.6
+typing-inspect==0.9.0
+typing-utils @ file:///home/conda/feedstock_root/build_artifacts/typing_utils_1622899189314/work
+typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1688315532570/work
+tzdata==2023.3
+urllib3==1.26.16
+virtualenv==20.23.1
+wandb==0.15.5
+wandb-addons @ file:///home/darek/projects/edu/dlai/wandb-addons
+wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1673864653149/work
+webencodings==0.5.1
+websocket-client @ file:///home/conda/feedstock_root/build_artifacts/websocket-client_1687789148259/work
+widgetsnbextension==4.0.8
+xxhash==3.2.0
+yarl==1.9.2
+zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1689027407711/work

From b3af0c9031cea7e79146c230a00ed6ba995c955f Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Mon, 17 Jul 2023 13:50:27 +0200
Subject: [PATCH 26/43] move ddpm out

---
 dlai/01_diffusion_training.ipynb | 108 ++++++-------------------------
 dlai/utilities.py                |  56 +++++++++++++++-
 2 files changed, 73 insertions(+), 91 deletions(-)

diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index d01015ed..13111df4 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "958524a2-cb56-439e-850e-032dd10478f2",
    "metadata": {},
@@ -41,10 +42,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# wandb.login(relogin=True) # uncomment if you want to login to wandb"
+    "wandb.login(anonymous=\"allow\")"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "7c0d229a",
    "metadata": {},
@@ -70,7 +72,7 @@
     "beta2 = 0.02\n",
     "\n",
     "# network hyperparameters\n",
-    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n",
+    "device = get_device()\n",
     "n_feat = 64 # 64 hidden dimension feature\n",
     "n_cfeat = 5 # context vector is of size 5\n",
     "height = 16 # 16x16 image\n",
@@ -84,7 +86,7 @@
     "n_epoch = 32\n",
     "lrate=1e-3\n",
     "\n",
-    "# we are storing the parameters in a dictionary to be logged to wandb\n",
+    "# we are storing the parameters to be logged to wandb\n",
     "config = SimpleNamespace(\n",
     "    num_samples=num_samples,\n",
     "    timesteps=timesteps,\n",
@@ -103,26 +105,22 @@
   },
   {
    "cell_type": "markdown",
-   "id": "bb43f98f",
+   "id": "9c99dea4",
    "metadata": {},
    "source": [
-    "All this is the same as the previous notebook, except for the addition of the context vector size n_cfeat. We will use this to condition the diffusion model on a context vector."
+    "setup DDPM noise scheduler and sampler (same as in the generative Ai course). \n",
+    "- perturb_input: Adds noise to the input image at the corresponding timestep on the schedule\n",
+    "- sample_ddpm_context: Samples from the model using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a705d0a8",
-   "metadata": {
-    "tags": []
-   },
+   "id": "6c642e1d",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "# construct DDPM noise schedule\n",
-    "b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1\n",
-    "a_t = 1 - b_t\n",
-    "ab_t = torch.cumsum(a_t.log(), dim=0).exp()    \n",
-    "ab_t[0] = 1"
+    "perturb_input, sample_ddpm_context = setup_ddpm(beta1, beta2, timesteps, device)"
    ]
   },
   {
@@ -152,79 +150,7 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "eb13689d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# helper function: perturbs an image to a specified noise level\n",
-    "def perturb_input(x, t, noise):\n",
-    "    return ab_t.sqrt()[t, None, None, None] * x + (1 - ab_t[t, None, None, None]) * noise"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fe8eb277",
-   "metadata": {},
-   "source": [
-    "## Sampling"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3",
-   "metadata": {},
-   "source": [
-    "We will need to instrument the sampler to have telemetry on the generated images while training!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8b0f5bed",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# helper function; removes the predicted noise (but adds some noise back in to avoid collapse)\n",
-    "def denoise_add_noise(x, t, pred_noise, z=None):\n",
-    "    if z is None:\n",
-    "        z = torch.randn_like(x)\n",
-    "    noise = b_t.sqrt()[t] * z\n",
-    "    mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt()\n",
-    "    return mean + noise"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "16085a65",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# sample with context using standard algorithm\n",
-    "# we make a change to the original algorithm to allow for context explicitely (the noises)\n",
-    "@torch.no_grad()\n",
-    "def sample_ddpm_context(samples, context, save_rate=20):\n",
-    "    # array to keep track of generated steps for plotting\n",
-    "    intermediate = [] \n",
-    "    for i in range(timesteps, 0, -1):\n",
-    "        # reshape time tensor\n",
-    "        t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n",
-    "\n",
-    "        # sample some random noise to inject back in. For i = 1, don't add back in noise\n",
-    "        z = torch.randn_like(samples) if i > 1 else 0\n",
-    "\n",
-    "        eps = nn_model(samples, t, c=context)    # predict noise e_(x_t,t, ctx)\n",
-    "        samples = denoise_add_noise(samples, i, eps, z)\n",
-    "        if i % save_rate==0 or i==timesteps or i<8:\n",
-    "            intermediate.append(samples.detach().cpu().numpy())\n",
-    "\n",
-    "    intermediate = np.stack(intermediate)\n",
-    "    return samples.clip(-1, 1), intermediate"
-   ]
-  },
-  {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "d9ed46d7",
    "metadata": {},
@@ -233,6 +159,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c",
    "metadata": {},
@@ -263,6 +190,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "26765a7e-4ddc-449e-95c3-54c58a564738",
    "metadata": {},
@@ -325,7 +253,7 @@
     "        wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n",
     "\n",
     "        # sample the model and log the images to W&B\n",
-    "        samples, _ = sample_ddpm_context(noises, ctx_vector[:num_samples])\n",
+    "        samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector[:num_samples])\n",
     "        wandb.log({\"train_samples\": [wandb.Image(img) for img in samples.split(1)]})\n",
     "\n",
     "# finish W&B run\n",
@@ -335,7 +263,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "78ddaed3-8184-4161-a1d6-5af139b336d0",
+   "id": "f676315f",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -357,7 +285,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,
diff --git a/dlai/utilities.py b/dlai/utilities.py
index a57955f4..d080d5c0 100644
--- a/dlai/utilities.py
+++ b/dlai/utilities.py
@@ -1,4 +1,4 @@
-import os
+import os, sys
 import random
 from pathlib import Path
 
@@ -13,6 +13,15 @@
 from torchvision.utils import make_grid, save_image
 
 
+def get_device():
+    "Pick GPU if cuda is available, mps if Mac, else CPU"
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    elif sys.platform == "darwin" and torch.backends.mps.is_available():
+        return torch.device("mps")
+    else:
+        return torch.device("cpu")
+
 def _fig_bounds(x):
     r = x//32
     return min(5, max(1,r))
@@ -387,3 +396,48 @@ def get_dataloaders(data_dir, batch_size, slice_size=None, valid_pct=0.2):
     valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=1)
 
     return train_dl, valid_dl
+
+
+## diffusion functions
+
+def setup_ddpm(beta1, beta2, timesteps, device):
+    # construct DDPM noise schedule and sampling functions
+    b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1
+    a_t = 1 - b_t
+    ab_t = torch.cumsum(a_t.log(), dim=0).exp()    
+    ab_t[0] = 1
+
+    # helper function: perturbs an image to a specified noise level
+    def perturb_input(x, t, noise):
+        return ab_t.sqrt()[t, None, None, None] * x + (1 - ab_t[t, None, None, None]) * noise
+
+    # helper function; removes the predicted noise (but adds some noise back in to avoid collapse)
+    def _denoise_add_noise(x, t, pred_noise, z=None):
+        if z is None:
+            z = torch.randn_like(x)
+        noise = b_t.sqrt()[t] * z
+        mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt()
+        return mean + noise
+
+    # sample with context using standard algorithm
+    # we make a change to the original algorithm to allow for context explicitely (the noises)
+    @torch.no_grad()
+    def sample_ddpm_context(nn_model, noises, context, save_rate=20):
+        # array to keep track of generated steps for plotting
+        intermediate = [] 
+        for i in range(timesteps, 0, -1):
+            # reshape time tensor
+            t = torch.tensor([i / timesteps])[:, None, None, None].to(noises.device)
+
+            # sample some random noise to inject back in. For i = 1, don't add back in noise
+            z = torch.randn_like(noises) if i > 1 else 0
+
+            eps = nn_model(noises, t, c=context)    # predict noise e_(x_t,t, ctx)
+            noises = _denoise_add_noise(noises, i, eps, z)
+            if i % save_rate==0 or i==timesteps or i<8:
+                intermediate.append(noises.detach().cpu().numpy())
+
+        intermediate = np.stack(intermediate)
+        return noises.clip(-1, 1), intermediate
+    
+    return perturb_input, sample_ddpm_context
\ No newline at end of file

From 27be48a9173afcb944e7357ff9a5112288e44929 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Mon, 17 Jul 2023 14:08:43 +0200
Subject: [PATCH 27/43] add tqdm

---
 dlai/utilities.py | 43 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/dlai/utilities.py b/dlai/utilities.py
index d080d5c0..f69a0b9a 100644
--- a/dlai/utilities.py
+++ b/dlai/utilities.py
@@ -7,6 +7,7 @@
 import torch
 import torch.nn as nn
 import torchvision.transforms as transforms
+from tqdm.auto import tqdm
 from matplotlib.animation import FuncAnimation, PillowWriter
 from PIL import Image
 from torch.utils.data import DataLoader, Dataset
@@ -425,7 +426,7 @@ def _denoise_add_noise(x, t, pred_noise, z=None):
     def sample_ddpm_context(nn_model, noises, context, save_rate=20):
         # array to keep track of generated steps for plotting
         intermediate = [] 
-        for i in range(timesteps, 0, -1):
+        for i in tqdm(range(timesteps, 0, -1), leave=False):
             # reshape time tensor
             t = torch.tensor([i / timesteps])[:, None, None, None].to(noises.device)
 
@@ -440,4 +441,42 @@ def sample_ddpm_context(nn_model, noises, context, save_rate=20):
         intermediate = np.stack(intermediate)
         return noises.clip(-1, 1), intermediate
     
-    return perturb_input, sample_ddpm_context
\ No newline at end of file
+    return perturb_input, sample_ddpm_context
+
+
+def setup_ddim(beta1, beta2, timesteps, device):
+    # define sampling function for DDIM   
+    b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1
+    a_t = 1 - b_t
+    ab_t = torch.cumsum(a_t.log(), dim=0).exp()    
+    ab_t[0] = 1
+    # removes the noise using ddim
+    def denoise_ddim(x, t, t_prev, pred_noise):
+        ab = ab_t[t]
+        ab_prev = ab_t[t_prev]
+        
+        x0_pred = ab_prev.sqrt() / ab.sqrt() * (x - (1 - ab).sqrt() * pred_noise)
+        dir_xt = (1 - ab_prev).sqrt() * pred_noise
+
+        return x0_pred + dir_xt
+    
+    # fast sampling algorithm with context
+    @torch.no_grad()
+    def sample_ddim_context(nn_model, noises, context, n=25): 
+        # array to keep track of generated steps for plotting
+        intermediate = [] 
+        step_size = timesteps // n
+        for i in tqdm(range(timesteps, 0, -step_size), leave=False):
+            print(f'sampling timestep {i:3d}', end='\r')
+
+            # reshape time tensor
+            t = torch.tensor([i / timesteps])[:, None, None, None].to(device)
+
+            eps = nn_model(noises, t, c=context)    # predict noise e_(x_t,t)
+            noises = denoise_ddim(noises, i, i - step_size, eps)
+            intermediate.append(noises.detach().cpu().numpy())
+
+        intermediate = np.stack(intermediate)
+        return noises.clip(-1, 1), intermediate
+    
+    return sample_ddim_context
\ No newline at end of file

From deb14d12da86a6a13609067488aa5d40c15e639a Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Mon, 17 Jul 2023 14:12:52 +0200
Subject: [PATCH 28/43] simplify

---
 dlai/02_diffusion_sampling.ipynb | 132 +++++--------------------------
 1 file changed, 18 insertions(+), 114 deletions(-)

diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index 2dd0c663..93d816ac 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -37,7 +37,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# wandb.login(relogin=True) # uncomment if you want to login to wandb"
+    "wandb.login(anonymous=\"allow\")"
    ]
   },
   {
@@ -69,7 +69,7 @@
     "height = 16\n",
     "ddim_n = 25\n",
     "\n",
-    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n",
+    "device = get_device()\n",
     "\n",
     "# we are storing the parameters in a dictionary to be logged to wandb\n",
     "config = SimpleNamespace(\n",
@@ -88,7 +88,7 @@
    "id": "bb43f98f",
    "metadata": {},
    "source": [
-    "We will load the model from a wandb.Artifact and set up the sampling loop."
+    "In the previous notebook we saved the best model as a wandb Artifact (our way of storing files during runs). We will now load the model from wandb and set up the sampling loop."
    ]
   },
   {
@@ -154,69 +154,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f6f479d1",
+   "id": "146424d3",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# construct DDPM noise schedule\n",
-    "b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1\n",
-    "a_t = 1 - b_t\n",
-    "ab_t = torch.cumsum(a_t.log(), dim=0).exp()    \n",
-    "ab_t[0] = 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8b0f5bed",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# helper function; removes the predicted noise (but adds some noise back in to avoid collapse)\n",
-    "def denoise_add_noise(x, t, pred_noise, z=None):\n",
-    "    if z is None:\n",
-    "        z = torch.randn_like(x)\n",
-    "    noise = b_t.sqrt()[t] * z\n",
-    "    mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt()\n",
-    "    return mean + noise"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "895b2bbc-46cd-4be8-a672-e84ef69bc329",
-   "metadata": {},
-   "source": [
-    "sample with context using standard algorithm\n",
-    "we make a change to the original algorithm to allow for context \n",
-    "and pass a fixed noise tensor (samples)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "16085a65",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@torch.no_grad()\n",
-    "def sample_ddpm_context(samples, context, save_rate=20):\n",
-    "    # array to keep track of generated steps for plotting\n",
-    "    intermediate = [] \n",
-    "    for i in range(timesteps, 0, -1):\n",
-    "        # reshape time tensor\n",
-    "        t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n",
-    "\n",
-    "        # sample some random noise to inject back in. For i = 1, don't add back in noise\n",
-    "        z = torch.randn_like(samples) if i > 1 else 0\n",
-    "\n",
-    "        eps = nn_model(samples, t, c=context)    # predict noise\n",
-    "        samples = denoise_add_noise(samples, i, eps, z)\n",
-    "        if i % save_rate==0 or i==timesteps or i<8:\n",
-    "            print(f'sampling timestep {i:3d}', end='\\r')\n",
-    "            intermediate.append(samples.detach().cpu().numpy())\n",
-    "\n",
-    "    intermediate = np.stack(intermediate)\n",
-    "    return samples.clip(-1, 1), intermediate"
+    "_, sample_ddpm_context = setup_ddpm(beta1, beta2, timesteps, device)"
    ]
   },
   {
@@ -260,49 +202,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc",
+   "id": "9c1a945d",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# define sampling function for DDIM   \n",
-    "# removes the noise using ddim\n",
-    "def denoise_ddim(x, t, t_prev, pred_noise):\n",
-    "    ab = ab_t[t]\n",
-    "    ab_prev = ab_t[t_prev]\n",
-    "    \n",
-    "    x0_pred = ab_prev.sqrt() / ab.sqrt() * (x - (1 - ab).sqrt() * pred_noise)\n",
-    "    dir_xt = (1 - ab_prev).sqrt() * pred_noise\n",
-    "\n",
-    "    return x0_pred + dir_xt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5fdfc048-47f0-43b5-983e-da715e1ed562",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# fast sampling algorithm with context\n",
-    "@torch.no_grad()\n",
-    "def sample_ddim_context(samples, context, n=25): \n",
-    "    # array to keep track of generated steps for plotting\n",
-    "    intermediate = [] \n",
-    "    step_size = timesteps // n\n",
-    "    for i in range(timesteps, 0, -step_size):\n",
-    "        print(f'sampling timestep {i:3d}', end='\\r')\n",
-    "\n",
-    "        # reshape time tensor\n",
-    "        t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n",
-    "\n",
-    "        eps = nn_model(samples, t, c=context)    # predict noise e_(x_t,t)\n",
-    "        samples = denoise_ddim(samples, i, i - step_size, eps)\n",
-    "        intermediate.append(samples.detach().cpu().numpy())\n",
-    "\n",
-    "    intermediate = np.stack(intermediate)\n",
-    "    return samples.clip(-1, 1), intermediate"
+    "sample_ddim_context = setup_ddim(beta1, beta2, timesteps, device)"
    ]
   },
   {
@@ -338,7 +242,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ddpm_samples, _ = sample_ddpm_context(noises, ctx_vector)"
+    "ddpm_samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector)"
    ]
   },
   {
@@ -356,7 +260,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ddim_samples, _ = sample_ddim_context(noises, ctx_vector, n=ddim_n)"
+    "ddim_samples, _ = sample_ddim_context(nn_model, noises, ctx_vector, n=ddim_n)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e2eb0969",
+   "metadata": {},
+   "source": [
+    "Let's put the class names as a column on the table"
    ]
   },
   {
@@ -419,14 +331,6 @@
     "with wandb.init(project=PROJECT, job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n",
     "    wandb.log({\"samplers_table\":table})"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a21faa2f-f43a-40c3-9041-7d07d73a358e",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -445,7 +349,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,

From 223a9cc55ddd01c4c63599887253ba9e50c935be Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Mon, 17 Jul 2023 14:13:08 +0200
Subject: [PATCH 29/43] add pbar description

---
 dlai/utilities.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/dlai/utilities.py b/dlai/utilities.py
index f69a0b9a..f61af8eb 100644
--- a/dlai/utilities.py
+++ b/dlai/utilities.py
@@ -426,7 +426,9 @@ def _denoise_add_noise(x, t, pred_noise, z=None):
     def sample_ddpm_context(nn_model, noises, context, save_rate=20):
         # array to keep track of generated steps for plotting
         intermediate = [] 
-        for i in tqdm(range(timesteps, 0, -1), leave=False):
+        for i in (pbar:=tqdm(range(timesteps, 0, -1), leave=False)):
+            pbar.set_description(f'sampling timestep {i:3d}')
+
             # reshape time tensor
             t = torch.tensor([i / timesteps])[:, None, None, None].to(noises.device)
 
@@ -466,8 +468,8 @@ def sample_ddim_context(nn_model, noises, context, n=25):
         # array to keep track of generated steps for plotting
         intermediate = [] 
         step_size = timesteps // n
-        for i in tqdm(range(timesteps, 0, -step_size), leave=False):
-            print(f'sampling timestep {i:3d}', end='\r')
+        for i in (pbar:=tqdm(range(timesteps, 0, -step_size), leave=False)):
+            pbar.set_description(f'sampling timestep {i:3d}')
 
             # reshape time tensor
             t = torch.tensor([i / timesteps])[:, None, None, None].to(device)

From 9f6301c43945b85af2cc041a9ff7022c72bdc52f Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Mon, 17 Jul 2023 14:15:05 +0200
Subject: [PATCH 30/43] rename project

---
 dlai/01_diffusion_training.ipynb | 2 +-
 dlai/02_diffusion_sampling.ipynb | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index 13111df4..920ac728 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -209,7 +209,7 @@
    "source": [
     "# same code as before, added comments on the extra W&B instrumentation lines\n",
     "# create a wandb run\n",
-    "run = wandb.init(project=\"sprite_diffusion\", \n",
+    "run = wandb.init(project=\"dlai_sprite_diffusion\", \n",
     "                 job_type=\"train\", \n",
     "                 anonymous=\"allow\", \n",
     "                 config=config)\n",
diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index 93d816ac..8f4116c6 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -58,7 +58,7 @@
    "outputs": [],
    "source": [
     "# Wandb Params\n",
-    "PROJECT = \"sprite_diffusion\"\n",
+    "PROJECT = \"dlai_sprite_diffusion\"\n",
     "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:latest\" \n",
     "\n",
     "# ddpm sampler hyperparameters\n",

From 27b45b91345ef54d96e91e3671246323d704aa80 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 11:23:56 +0200
Subject: [PATCH 31/43] simplify loading from dir

---
 dlai/utilities.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/dlai/utilities.py b/dlai/utilities.py
index f61af8eb..e280e668 100644
--- a/dlai/utilities.py
+++ b/dlai/utilities.py
@@ -349,9 +349,11 @@ def __init__(self, sprites, slabels, transform=default_tfms, null_context=False,
         self.null_context = null_context
 
     @classmethod
-    def from_np(cls, sfilename, lfilename, transform=default_tfms, null_context=False, argmax=False):
-        sprites = np.load(sfilename)
-        slabels = np.load(lfilename)
+    def from_np(cls, 
+                path, 
+                sfilename="sprites_1788_16x16.npy", lfilename="sprite_labels_nc_1788_16x16.npy", transform=default_tfms, null_context=False, argmax=False):
+        sprites = np.load(Path(path)/sfilename)
+        slabels = np.load(Path(path)/lfilename)
         return cls(sprites, slabels, transform, null_context, argmax)
 
     # Return the number of images in the dataset
@@ -426,7 +428,8 @@ def _denoise_add_noise(x, t, pred_noise, z=None):
     def sample_ddpm_context(nn_model, noises, context, save_rate=20):
         # array to keep track of generated steps for plotting
         intermediate = [] 
-        for i in (pbar:=tqdm(range(timesteps, 0, -1), leave=False)):
+        pbar = tqdm(range(timesteps, 0, -1), leave=False)
+        for i in pbar:
             pbar.set_description(f'sampling timestep {i:3d}')
 
             # reshape time tensor
@@ -468,7 +471,8 @@ def sample_ddim_context(nn_model, noises, context, n=25):
         # array to keep track of generated steps for plotting
         intermediate = [] 
         step_size = timesteps // n
-        for i in (pbar:=tqdm(range(timesteps, 0, -step_size), leave=False)):
+        pbar=tqdm(range(timesteps, 0, -step_size), leave=False)
+        for i in pbar:
             pbar.set_description(f'sampling timestep {i:3d}')
 
             # reshape time tensor

From 738d07d202a3c868b73298b6b51cecc3ddb0960c Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 11:32:48 +0200
Subject: [PATCH 32/43] simplify more, config and formats

---
 dlai/01_diffusion_training.ipynb | 108 ++++++++++++++-----------------
 dlai/02_diffusion_sampling.ipynb |  62 ++++++++----------
 2 files changed, 75 insertions(+), 95 deletions(-)

diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index 920ac728..8c06d2f8 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -1,7 +1,6 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "958524a2-cb56-439e-850e-032dd10478f2",
    "metadata": {},
@@ -46,7 +45,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "7c0d229a",
    "metadata": {},
@@ -63,44 +61,32 @@
    },
    "outputs": [],
    "source": [
-    "# hyperparameters\n",
-    "num_samples = 30\n",
-    "\n",
-    "# diffusion hyperparameters\n",
-    "timesteps = 500\n",
-    "beta1 = 1e-4\n",
-    "beta2 = 0.02\n",
+    "# we are storing the parameters to be logged to wandb\n",
+    "DATA_DIR = Path('./data/')\n",
+    "SAVE_DIR = Path('./data/weights/')\n",
+    "SAVE_DIR.mkdir(exist_ok=True, parents=True)\n",
     "\n",
-    "# network hyperparameters\n",
-    "device = get_device()\n",
-    "n_feat = 64 # 64 hidden dimension feature\n",
-    "n_cfeat = 5 # context vector is of size 5\n",
-    "height = 16 # 16x16 image\n",
-    "data_dir = Path('./data/')\n",
-    "save_dir = Path('./data/weights/')\n",
-    "save_dir.mkdir(exist_ok=True, parents=True)\n",
+    "config = SimpleNamespace(\n",
+    "    # hyperparameters\n",
+    "    num_samples = 30,\n",
     "\n",
+    "    # diffusion hyperparameters\n",
+    "    timesteps = 500,\n",
+    "    beta1 = 1e-4,\n",
+    "    beta2 = 0.02,\n",
     "\n",
-    "# training hyperparameters\n",
-    "batch_size = 100\n",
-    "n_epoch = 32\n",
-    "lrate=1e-3\n",
+    "    # network hyperparameters\n",
+    "    n_feat = 64, # 64 hidden dimension feature\n",
+    "    n_cfeat = 5, # context vector is of size 5\n",
+    "    height = 16, # 16x16 image\n",
+    "    \n",
+    "    # training hyperparameters\n",
+    "    batch_size = 100,\n",
+    "    n_epoch = 32,\n",
+    "    lrate = 1e-3,\n",
+    ")\n",
     "\n",
-    "# we are storing the parameters to be logged to wandb\n",
-    "config = SimpleNamespace(\n",
-    "    num_samples=num_samples,\n",
-    "    timesteps=timesteps,\n",
-    "    beta1=beta1,\n",
-    "    beta2=beta2,\n",
-    "    device=device,\n",
-    "    n_feat=n_feat,\n",
-    "    n_cfeat=n_cfeat,\n",
-    "    height=height,\n",
-    "    save_dir=save_dir,\n",
-    "    batch_size=batch_size,\n",
-    "    n_epoch=n_epoch,\n",
-    "    lrate=lrate,\n",
-    ")"
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
    ]
   },
   {
@@ -120,7 +106,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "perturb_input, sample_ddpm_context = setup_ddpm(beta1, beta2, timesteps, device)"
+    "perturb_input, sample_ddpm_context = setup_ddpm(config.beta1, \n",
+    "                                                config.beta2, \n",
+    "                                                config.timesteps, \n",
+    "                                                device)"
    ]
   },
   {
@@ -133,7 +122,11 @@
    "outputs": [],
    "source": [
     "# construct model\n",
-    "nn_model = ContextUnet(in_channels=3, n_feat=n_feat, n_cfeat=n_cfeat, height=height).to(device)"
+    "nn_model = ContextUnet(\n",
+    "    in_channels=3, \n",
+    "    n_feat=config.n_feat, \n",
+    "    n_cfeat=config.n_cfeat, \n",
+    "    height=config.height).to(device)"
    ]
   },
   {
@@ -144,13 +137,15 @@
    "outputs": [],
    "source": [
     "# load dataset and construct optimizer\n",
-    "dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", data_dir/\"sprite_labels_nc_1788_16x16.npy\")\n",
-    "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)\n",
-    "optim = torch.optim.Adam(nn_model.parameters(), lr=lrate, eps=1e-5)"
+    "dataset = CustomDataset.from_np(path=DATA_DIR)\n",
+    "dataloader = DataLoader(dataset, \n",
+    "                        batch_size=config.batch_size, \n",
+    "                        shuffle=True, \n",
+    "                        num_workers=1)\n",
+    "optim = torch.optim.Adam(nn_model.parameters(), lr=config.lrate)"
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "d9ed46d7",
    "metadata": {},
@@ -159,7 +154,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c",
    "metadata": {},
@@ -178,7 +172,7 @@
    "source": [
     "# Noise vector\n",
     "# x_T ~ N(0, 1), sample initial noise\n",
-    "noises = torch.randn(num_samples, 3, height, height).to(device)  \n",
+    "noises = torch.randn(config.num_samples, 3, config.height, config.height).to(device)  \n",
     "\n",
     "# A fixed context vector to sample from\n",
     "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
@@ -190,7 +184,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "id": "26765a7e-4ddc-449e-95c3-54c58a564738",
    "metadata": {},
@@ -214,10 +207,13 @@
     "                 anonymous=\"allow\", \n",
     "                 config=config)\n",
     "\n",
-    "for ep in tqdm(range(n_epoch), leave=True, total=n_epoch):\n",
+    "# we pass the config back from W&B\n",
+    "config = wandb.config\n",
+    "\n",
+    "for ep in tqdm(range(config.n_epoch), leave=True, total=config.n_epoch):\n",
     "    # set into train mode\n",
     "    nn_model.train()\n",
-    "    optim.param_groups[0]['lr'] = lrate*(1-ep/n_epoch)\n",
+    "    optim.param_groups[0]['lr'] = config.lrate*(1-ep/config.n_epoch)\n",
     "    \n",
     "    pbar = tqdm(dataloader, leave=False)\n",
     "    for x, c in pbar:   # x: images  c: context\n",
@@ -227,9 +223,9 @@
     "        context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n",
     "        c = c * context_mask.unsqueeze(-1)        \n",
     "        noise = torch.randn_like(x)\n",
-    "        t = torch.randint(1, timesteps + 1, (x.shape[0],)).to(device) \n",
+    "        t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(device) \n",
     "        x_pert = perturb_input(x, t, noise)      \n",
-    "        pred_noise = nn_model(x_pert, t / timesteps, c=c)      \n",
+    "        pred_noise = nn_model(x_pert, t / config.timesteps, c=c)      \n",
     "        loss = F.mse_loss(pred_noise, noise)\n",
     "        loss.backward()    \n",
     "        optim.step()\n",
@@ -242,7 +238,7 @@
     "    # save model periodically\n",
     "    if ep%4==0 or ep == int(n_epoch-1):\n",
     "        nn_model.eval()\n",
-    "        ckpt_file = save_dir/f\"context_model.pth\"\n",
+    "        ckpt_file = SAVE_DIR/f\"context_model.pth\"\n",
     "        torch.save(nn_model.state_dict(), ckpt_file)\n",
     "\n",
     "        # save model to wandb as an Artifact\n",
@@ -253,25 +249,17 @@
     "        wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n",
     "\n",
     "        # sample the model and log the images to W&B\n",
-    "        samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector[:num_samples])\n",
+    "        samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector[:config.num_samples])\n",
     "        wandb.log({\"train_samples\": [wandb.Image(img) for img in samples.split(1)]})\n",
     "\n",
     "# finish W&B run\n",
     "wandb.finish()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f676315f",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index 8f4116c6..8396b280 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -58,29 +58,25 @@
    "outputs": [],
    "source": [
     "# Wandb Params\n",
-    "PROJECT = \"dlai_sprite_diffusion\"\n",
     "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:latest\" \n",
     "\n",
-    "# ddpm sampler hyperparameters\n",
-    "timesteps = 500\n",
-    "beta1 = 1e-4\n",
-    "beta2 = 0.02\n",
-    "num_samples = 30\n",
-    "height = 16\n",
-    "ddim_n = 25\n",
-    "\n",
-    "device = get_device()\n",
-    "\n",
-    "# we are storing the parameters in a dictionary to be logged to wandb\n",
     "config = SimpleNamespace(\n",
-    "    timesteps=timesteps,\n",
-    "    beta1=beta1,\n",
-    "    beta2=beta2,\n",
-    "    num_samples=num_samples,\n",
-    "    height=height,\n",
-    "    ddim_n=ddim_n,\n",
-    "    device=device,\n",
-    ")"
+    "    # hyperparameters\n",
+    "    num_samples = 30,\n",
+    "    \n",
+    "    # ddpm sampler hyperparameters\n",
+    "    timesteps = 500,\n",
+    "    beta1 = 1e-4,\n",
+    "    beta2 = 0.02,\n",
+    "    \n",
+    "    # ddim sampler hp\n",
+    "    ddim_n = 25,\n",
+    "    \n",
+    "    # network hyperparameters\n",
+    "    height = 16,\n",
+    ")\n",
+    "\n",
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
    ]
   },
   {
@@ -158,7 +154,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "_, sample_ddpm_context = setup_ddpm(beta1, beta2, timesteps, device)"
+    "_, sample_ddpm_context = setup_ddpm(config.beta1, \n",
+    "                                    config.beta2, \n",
+    "                                    config.timesteps, \n",
+    "                                    device)"
    ]
   },
   {
@@ -180,7 +179,7 @@
    "source": [
     "# Noise vector\n",
     "# x_T ~ N(0, 1), sample initial noise\n",
-    "noises = torch.randn(num_samples, 3, height, height).to(device)  \n",
+    "noises = torch.randn(config.num_samples, 3, config.height, config.height).to(device)  \n",
     "\n",
     "# A fixed context vector to sample from\n",
     "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
@@ -206,7 +205,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sample_ddim_context = setup_ddim(beta1, beta2, timesteps, device)"
+    "sample_ddim_context = setup_ddim(config.beta1, config.beta2, config.timesteps, device)"
    ]
   },
   {
@@ -232,6 +231,7 @@
    "id": "90b838be-8fa1-4c12-9c4f-e40dfacc08e1",
    "metadata": {},
    "source": [
+    "### Sampling:\n",
     "let's compute ddpm samples as before"
    ]
   },
@@ -260,7 +260,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ddim_samples, _ = sample_ddim_context(nn_model, noises, ctx_vector, n=ddim_n)"
+    "ddim_samples, _ = sample_ddim_context(nn_model, noises, ctx_vector, n=config.ddim_n)"
    ]
   },
   {
@@ -283,14 +283,6 @@
     "    return [classes[i] for i in ctx_vector.argmax(dim=1)]"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "daea8275-0356-452e-a9f9-2824ef53f1ea",
-   "metadata": {},
-   "source": [
-    "Let's keep track of the sampling params on a dictionary"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "85be303d-0f0b-4df4-8c87-bd1bfb6145a2",
@@ -307,7 +299,7 @@
    "outputs": [],
    "source": [
     "for noise, ddpm_sample, ddim_sample, c in zip(noises, ddpm_samples, ddim_samples, ctx_to_classes(ctx_vector)):\n",
-    "    table.add_data(wandb.Image(noise), \n",
+    "    table.add_data(wandb.Image(noise), # we log the input noise to analyse the impact\n",
     "                   wandb.Image(ddpm_sample), \n",
     "                   wandb.Image(ddim_sample),\n",
     "                   c)"
@@ -328,14 +320,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with wandb.init(project=PROJECT, job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n",
+    "with wandb.init(project=\"dlai_sprite_diffusion\", job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n",
     "    wandb.log({\"samplers_table\":table})"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },

From 405a0cbe529aa28650551da38b41d5ff9b9fb248 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 12:06:05 +0200
Subject: [PATCH 33/43] extra tidy up

---
 dlai/01_diffusion_training.ipynb | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index 8c06d2f8..87302ad5 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -5,11 +5,12 @@
    "id": "958524a2-cb56-439e-850e-032dd10478f2",
    "metadata": {},
    "source": [
-    "# Training a Diffusion Model with W&B\n",
+    "# Training a Diffusion Model with Weights and Biases (W&B)\n",
     "\n",
-    "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook and add:\n",
-    "- Logging of the training loss\n",
-    "- Sampling from the model during training and logging the samples to W&B\n",
+    "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook from the [\"How diffusion models works\"](https://www.deeplearning.ai/short-courses/how-diffusion-models-work/) course. \n",
+    "We will add:\n",
+    "- Logging of the training loss and metrics\n",
+    "- Sampling from the model during training and uploading the samples to W&B\n",
     "- Saving the model checkpoints to W&B"
    ]
   },
@@ -34,6 +35,14 @@
     "import wandb"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "8969ab86-bd9b-475d-96e2-b913b42dec14",
+   "metadata": {},
+   "source": [
+    "We encourage you to create an account to get the full user experience from W&B"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -94,7 +103,7 @@
    "id": "9c99dea4",
    "metadata": {},
    "source": [
-    "setup DDPM noise scheduler and sampler (same as in the generative Ai course). \n",
+    "### Setup DDPM noise scheduler and sampler (same as in the Diffusion course). \n",
     "- perturb_input: Adds noise to the input image at the corresponding timestep on the schedule\n",
     "- sample_ddpm_context: Samples from the model using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing"
    ]
@@ -158,7 +167,7 @@
    "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c",
    "metadata": {},
    "source": [
-    "we choose a fixed context vector with 6 of each class, this way we know what to expect on the workspace."
+    "We choose a fixed context vector with 6 samples of each class to guide our diffusion"
    ]
   },
   {
@@ -200,7 +209,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# same code as before, added comments on the extra W&B instrumentation lines\n",
     "# create a wandb run\n",
     "run = wandb.init(project=\"dlai_sprite_diffusion\", \n",
     "                 job_type=\"train\", \n",

From d7c70297d66e53576f85e3eabafe2ab1f00d4343 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 13:47:34 +0200
Subject: [PATCH 34/43] fix loading CustomDataset

---
 dlai/utilities.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/dlai/utilities.py b/dlai/utilities.py
index e280e668..f6d8269a 100644
--- a/dlai/utilities.py
+++ b/dlai/utilities.py
@@ -386,10 +386,7 @@ def split(self, pct=0.2):
 
 def get_dataloaders(data_dir, batch_size, slice_size=None, valid_pct=0.2):
     "Get train/val dataloaders for classification on sprites dataset"
-    dataset = CustomDataset.from_np(Path(data_dir)/"sprites_1788_16x16.npy", 
-                                    Path(data_dir)/"sprite_labels_nc_1788_16x16.npy",
-                                    argmax=True)
-
+    dataset = CustomDataset.from_np(Path(data_dir), argmax=True)
     if slice_size:
         dataset = dataset.subset(slice_size)
 

From c1172ded4dc589d9f7e212bfb0fca0adc19f5511 Mon Sep 17 00:00:00 2001
From: kldarek <darek.kleczek@gmail.com>
Date: Tue, 18 Jul 2023 13:53:19 +0200
Subject: [PATCH 35/43] simplification

---
 dlai/00_intro.ipynb | 54 ++++++++++-----------------------------------
 1 file changed, 12 insertions(+), 42 deletions(-)

diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb
index 54b61396..3e32c75d 100644
--- a/dlai/00_intro.ipynb
+++ b/dlai/00_intro.ipynb
@@ -20,15 +20,12 @@
     "import math\n",
     "from pathlib import Path\n",
     "from types import SimpleNamespace\n",
-    "import wandb\n",
-    "\n",
     "from tqdm.auto import tqdm\n",
     "import torch\n",
     "import torch.nn as nn\n",
     "import torch.nn.functional as F\n",
     "from torch.optim import Adam\n",
-    "\n",
-    "from utilities import *"
+    "from utilities import get_dataloaders"
    ]
   },
   {
@@ -95,12 +92,7 @@
    "source": [
     "def train_model(config):\n",
     "    \"Train a model with a given config\"\n",
-    "    # Start a wandb run\n",
-    "    wandb.init(\n",
-    "        project=\"dlai-intro\",\n",
-    "        config=config,\n",
-    "        anonymous=\"allow\",\n",
-    "    )\n",
+    "    \n",
     "    # Get the data\n",
     "    train_dl, valid_dl = get_dataloaders(DATA_DIR, \n",
     "                                         config.batch_size, \n",
@@ -132,25 +124,17 @@
     "            example_ct += len(images)\n",
     "            metrics = {\n",
     "                \"train/train_loss\": train_loss,\n",
-    "                \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch))/n_steps_per_epoch,\n",
+    "                \"train/epoch\": epoch + 1,\n",
     "                \"train/example_ct\": example_ct\n",
     "            }\n",
-    "            # Log train metrics to wandb\n",
-    "            wandb.log(metrics)\n",
     "            \n",
     "        # Compute validation metrics, log images on last epoch\n",
-    "        val_loss, accuracy = validate_model(model, valid_dl, loss_func, \n",
-    "                                            log_images=(epoch == (config.epochs - 1)))\n",
-    "\n",
-    "        # Log train and validation metrics to wandb\n",
+    "        val_loss, accuracy = validate_model(model, valid_dl, loss_func)\n",
+    "        # Compute train and validation metrics\n",
     "        val_metrics = {\n",
     "            \"val/val_loss\": val_loss,\n",
     "            \"val/val_accuracy\": accuracy\n",
-    "        }\n",
-    "        # Log validation metrics to wandb\n",
-    "        wandb.log(val_metrics)\n",
-    "\n",
-    "    wandb.finish()"
+    "        }\n"
    ]
   },
   {
@@ -160,8 +144,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):\n",
-    "    \"Compute the performance of the model on the validation dataset and log a wandb.Table\"\n",
+    "def validate_model(model, valid_dl, loss_func):\n",
+    "    \"Compute the performance of the model on the validation dataset\"\n",
     "    model.eval()\n",
     "    val_loss = 0.0\n",
     "    correct = 0\n",
@@ -177,22 +161,8 @@
     "            # Compute accuracy and accumulate\n",
     "            _, predicted = torch.max(outputs.data, 1)\n",
     "            correct += (predicted == labels).sum().item()\n",
-    "\n",
-    "            # Log one batch of images to the dashboard, always same batch_idx.\n",
-    "            if i == batch_idx and log_images:\n",
-    "                log_image_predictions_table(images, predicted, labels, outputs.softmax(dim=1))\n",
-    "\n",
-    "    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n",
-    "\n",
-    "def log_image_predictions_table(images, predicted, labels, probs):\n",
-    "    \"Create a wandb Table to log images, labels, and predictions\"\n",
-    "    columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n",
-    "    table = wandb.Table(columns=columns)\n",
-    "    \n",
-    "    for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n",
-    "        table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n",
-    "    \n",
-    "    wandb.log({\"predictions_table\": table}, commit=False)\n"
+    "            \n",
+    "    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n"
    ]
   },
   {
@@ -211,7 +181,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# wandb.login() # uncomment if you want to login to wandb"
+    "wandb.login(anonymous=\"allow\")"
    ]
   },
   {
@@ -289,7 +259,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.8.13"
   }
  },
  "nbformat": 4,

From 3df314fd5e125ca46595aebfa74636123e4a7303 Mon Sep 17 00:00:00 2001
From: kldarek <darek.kleczek@gmail.com>
Date: Tue, 18 Jul 2023 14:29:30 +0200
Subject: [PATCH 36/43] simplification2

---
 dlai/00_intro.ipynb | 51 ++++++++++++++-------------------------------
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb
index 3e32c75d..3d4b0a8c 100644
--- a/dlai/00_intro.ipynb
+++ b/dlai/00_intro.ipynb
@@ -25,7 +25,9 @@
     "import torch.nn as nn\n",
     "import torch.nn.functional as F\n",
     "from torch.optim import Adam\n",
-    "from utilities import get_dataloaders"
+    "from utilities import get_dataloaders\n",
+    "\n",
+    "import wandb"
    ]
   },
   {
@@ -92,7 +94,11 @@
    "source": [
     "def train_model(config):\n",
     "    \"Train a model with a given config\"\n",
-    "    \n",
+    "    # Start a wandb run\n",
+    "    wandb.init(\n",
+    "        project=\"dlai-intro\",\n",
+    "        config=config,\n",
+    "    )\n",
     "    # Get the data\n",
     "    train_dl, valid_dl = get_dataloaders(DATA_DIR, \n",
     "                                         config.batch_size, \n",
@@ -127,6 +133,8 @@
     "                \"train/epoch\": epoch + 1,\n",
     "                \"train/example_ct\": example_ct\n",
     "            }\n",
+    "            # log training metrics to wandb\n",
+    "            wandb.log(metrics)\n",
     "            \n",
     "        # Compute validation metrics, log images on last epoch\n",
     "        val_loss, accuracy = validate_model(model, valid_dl, loss_func)\n",
@@ -134,7 +142,11 @@
     "        val_metrics = {\n",
     "            \"val/val_loss\": val_loss,\n",
     "            \"val/val_accuracy\": accuracy\n",
-    "        }\n"
+    "        }\n",
+    "        # log validation metrics to wandb\n",
+    "        wandb.log(val_metrics)\n",
+    "    \n",
+    "    wandb.finish()\n"
    ]
   },
   {
@@ -206,38 +218,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6d8af6b3-fdec-4f46-90b4-28585257e9cd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "config.epochs = 3\n",
-    "train_model(config)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e9ecf01d",
-   "metadata": {},
-   "source": [
-    "Let's try other values of hyperparameters:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "593f7b8d-216c-4b51-a389-eaae195e5e74",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "config.epochs = 1\n",
-    "config.lr = 1e-4\n",
-    "train_model(config)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9d83ea0a",
+   "id": "2e7c186f",
    "metadata": {},
    "outputs": [],
    "source": []

From 465b872838a29047ff44a17de78988e5c8d434f4 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 17:33:07 +0200
Subject: [PATCH 37/43] split into instruct

---
 dlai/01_diffusion_training-instructor.ipynb | 300 ++++++++++++++++++++
 dlai/01_diffusion_training.ipynb            |  30 +-
 dlai/utilities.py                           |   6 +-
 3 files changed, 315 insertions(+), 21 deletions(-)
 create mode 100644 dlai/01_diffusion_training-instructor.ipynb

diff --git a/dlai/01_diffusion_training-instructor.ipynb b/dlai/01_diffusion_training-instructor.ipynb
new file mode 100644
index 00000000..9010b2d3
--- /dev/null
+++ b/dlai/01_diffusion_training-instructor.ipynb
@@ -0,0 +1,300 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "958524a2-cb56-439e-850e-032dd10478f2",
+   "metadata": {},
+   "source": [
+    "# Training a Diffusion Model with Weights and Biases (W&B)\n",
+    "\n",
+    "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook from the [\"How diffusion models work\"](https://www.deeplearning.ai/short-courses/how-diffusion-models-work/) course. \n",
+    "We will add:\n",
+    "- Logging of the training loss and metrics\n",
+    "- Sampling from the model during training and uploading the samples to W&B\n",
+    "- Saving the model checkpoints to W&B"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "700e687c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from types import SimpleNamespace\n",
+    "from pathlib import Path\n",
+    "from tqdm.notebook import tqdm\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "from torch.utils.data import DataLoader\n",
+    "import numpy as np\n",
+    "from utilities import *\n",
+    "\n",
+    "import wandb"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8969ab86-bd9b-475d-96e2-b913b42dec14",
+   "metadata": {},
+   "source": [
+    "We encourage you to create an account to get the full user experience from W&B"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b88f9513",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wandb.login(anonymous=\"allow\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7c0d229a",
+   "metadata": {},
+   "source": [
+    "## Setting Things Up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d76c167-7122-4f88-9c9f-5ded96684fa5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# we are storing the parameters to be logged to wandb\n",
+    "DATA_DIR = Path('./data/')\n",
+    "SAVE_DIR = Path('./data/weights/')\n",
+    "SAVE_DIR.mkdir(exist_ok=True, parents=True)\n",
+    "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "\n",
+    "config = SimpleNamespace(\n",
+    "    # hyperparameters\n",
+    "    num_samples = 30,\n",
+    "\n",
+    "    # diffusion hyperparameters\n",
+    "    timesteps = 500,\n",
+    "    beta1 = 1e-4,\n",
+    "    beta2 = 0.02,\n",
+    "\n",
+    "    # network hyperparameters\n",
+    "    n_feat = 64, # 64 hidden dimension feature\n",
+    "    n_cfeat = 5, # context vector is of size 5\n",
+    "    height = 16, # 16x16 image\n",
+    "    \n",
+    "    # training hyperparameters\n",
+    "    batch_size = 100,\n",
+    "    n_epoch = 32,\n",
+    "    lrate = 1e-3,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9c99dea4",
+   "metadata": {},
+   "source": [
+    "### Setup DDPM noise scheduler and sampler (same as in the Diffusion course). \n",
+    "- perturb_input: Adds noise to the input image at the corresponding timestep on the schedule\n",
+    "- sample_ddpm_context: Generate images using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c642e1d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# setup ddpm sampler functions\n",
+    "perturb_input, sample_ddpm_context = setup_ddpm(config.beta1, \n",
+    "                                                config.beta2, \n",
+    "                                                config.timesteps, \n",
+    "                                                DEVICE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6bc9001e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# construct model\n",
+    "nn_model = ContextUnet(\n",
+    "    in_channels=3, \n",
+    "    n_feat=config.n_feat, \n",
+    "    n_cfeat=config.n_cfeat, \n",
+    "    height=config.height).to(DEVICE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "76c63b85",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load dataset and construct optimizer\n",
+    "dataset = CustomDataset.from_np(path=DATA_DIR)\n",
+    "dataloader = DataLoader(dataset, \n",
+    "                        batch_size=config.batch_size, \n",
+    "                        shuffle=True, \n",
+    "                        num_workers=1)\n",
+    "optim = torch.optim.Adam(nn_model.parameters(), lr=config.lrate)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d9ed46d7",
+   "metadata": {},
+   "source": [
+    "## Training"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c",
+   "metadata": {},
+   "source": [
+    "We choose a fixed context vector with 6 samples of each class to guide our diffusion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d88afdba",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Noise vector\n",
+    "# x_T ~ N(0, 1), sample initial noise\n",
+    "noises = torch.randn(config.num_samples, 3, \n",
+    "                     config.height, config.height).to(DEVICE)  \n",
+    "\n",
+    "# A fixed context vector to sample from\n",
+    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
+    "                                     1,1,1,1,1,1,\n",
+    "                                     2,2,2,2,2,2,\n",
+    "                                     3,3,3,3,3,3,\n",
+    "                                     4,4,4,4,4,4]), \n",
+    "                       5).to(DEVICE).float()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26765a7e-4ddc-449e-95c3-54c58a564738",
+   "metadata": {},
+   "source": [
+    "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n",
+    "\n",
+    "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua) <<"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5f4af69",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create a wandb run\n",
+    "run = wandb.init(project=\"dlai_sprite_diffusion\", \n",
+    "                 job_type=\"train\", \n",
+    "                 anonymous=\"allow\", \n",
+    "                 config=config)\n",
+    "\n",
+    "# we pass the config back from W&B\n",
+    "config = wandb.config\n",
+    "\n",
+    "for ep in tqdm(range(config.n_epoch), leave=True, total=config.n_epoch):\n",
+    "    # set into train mode\n",
+    "    nn_model.train()\n",
+    "    optim.param_groups[0]['lr'] = config.lrate*(1-ep/config.n_epoch)\n",
+    "    \n",
+    "    pbar = tqdm(dataloader, leave=False)\n",
+    "    for x, c in pbar:   # x: images  c: context\n",
+    "        optim.zero_grad()\n",
+    "        x = x.to(device)\n",
+    "        c = c.to(x)   \n",
+    "        context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n",
+    "        c = c * context_mask.unsqueeze(-1)        \n",
+    "        noise = torch.randn_like(x)\n",
+    "        t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(device) \n",
+    "        x_pert = perturb_input(x, t, noise)      \n",
+    "        pred_noise = nn_model(x_pert, t / config.timesteps, c=c)      \n",
+    "        loss = F.mse_loss(pred_noise, noise)\n",
+    "        loss.backward()    \n",
+    "        optim.step()\n",
+    "\n",
+    "        # we log the relevant metrics to the workspace\n",
+    "        wandb.log({\"loss\": loss.item(),\n",
+    "                   \"lr\": optim.param_groups[0]['lr'],\n",
+    "                   \"epoch\": ep})\n",
+    "\n",
+    "    # save model periodically\n",
+    "    if ep%4==0 or ep == int(n_epoch-1):\n",
+    "        nn_model.eval()\n",
+    "        ckpt_file = SAVE_DIR/f\"context_model.pth\"\n",
+    "        torch.save(nn_model.state_dict(), ckpt_file)\n",
+    "        \n",
+    "        \n",
+    "        ###########################################################\n",
+    "        \n",
+    "        # save model to wandb as an Artifact\n",
+    "        artifact_name = f\"{wandb.run.id}_context_model\"\n",
+    "        at = wandb.Artifact(artifact_name, type=\"model\")\n",
+    "        at.add_file(ckpt_file)\n",
+    "        wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n",
+    "        \n",
+    "        ###########################################################\n",
+    "        \n",
+    "        # sample the model and log the images to W&B\n",
+    "        samples, _ = sample_ddpm_context(nn_model, \n",
+    "                                         noises, \n",
+    "                                         ctx_vector[:config.num_samples])\n",
+    "        wandb.log({\n",
+    "            \"train_samples\": [\n",
+    "                wandb.Image(img) for img in samples.split(1)\n",
+    "            ]})\n",
+    "        \n",
+    "        ###########################################################\n",
+    "# finish W&B run\n",
+    "wandb.finish()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index 87302ad5..f8e87ae3 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -7,7 +7,7 @@
    "source": [
     "# Training a Diffusion Model with Weights and Biases (W&B)\n",
     "\n",
-    "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook from the [\"How diffusion models works\"](https://www.deeplearning.ai/short-courses/how-diffusion-models-work/) course. \n",
+    "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook from the [\"How diffusion models work\"](https://www.deeplearning.ai/short-courses/how-diffusion-models-work/) course. \n",
     "We will add:\n",
     "- Logging of the training loss and metrics\n",
     "- Sampling from the model during training and uploading the samples to W&B\n",
@@ -74,6 +74,7 @@
     "DATA_DIR = Path('./data/')\n",
     "SAVE_DIR = Path('./data/weights/')\n",
     "SAVE_DIR.mkdir(exist_ok=True, parents=True)\n",
+    "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
     "\n",
     "config = SimpleNamespace(\n",
     "    # hyperparameters\n",
@@ -93,9 +94,7 @@
     "    batch_size = 100,\n",
     "    n_epoch = 32,\n",
     "    lrate = 1e-3,\n",
-    ")\n",
-    "\n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
+    ")"
    ]
   },
   {
@@ -105,7 +104,7 @@
    "source": [
     "### Setup DDPM noise scheduler and sampler (same as in the Diffusion course). \n",
     "- perturb_input: Adds noise to the input image at the corresponding timestep on the schedule\n",
-    "- sample_ddpm_context: Samples from the model using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing"
+    "- sample_ddpm_context: Generate images using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing"
    ]
   },
   {
@@ -115,10 +114,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# setup ddpm sampler functions\n",
     "perturb_input, sample_ddpm_context = setup_ddpm(config.beta1, \n",
     "                                                config.beta2, \n",
     "                                                config.timesteps, \n",
-    "                                                device)"
+    "                                                DEVICE)"
    ]
   },
   {
@@ -135,7 +135,7 @@
     "    in_channels=3, \n",
     "    n_feat=config.n_feat, \n",
     "    n_cfeat=config.n_cfeat, \n",
-    "    height=config.height).to(device)"
+    "    height=config.height).to(DEVICE)"
    ]
   },
   {
@@ -181,7 +181,8 @@
    "source": [
     "# Noise vector\n",
     "# x_T ~ N(0, 1), sample initial noise\n",
-    "noises = torch.randn(config.num_samples, 3, config.height, config.height).to(device)  \n",
+    "noises = torch.randn(config.num_samples, 3, \n",
+    "                     config.height, config.height).to(DEVICE)  \n",
     "\n",
     "# A fixed context vector to sample from\n",
     "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
@@ -189,7 +190,7 @@
     "                                     2,2,2,2,2,2,\n",
     "                                     3,3,3,3,3,3,\n",
     "                                     4,4,4,4,4,4]), \n",
-    "                       5).to(device=device).float()"
+    "                       5).to(DEVICE).float()"
    ]
   },
   {
@@ -249,17 +250,6 @@
     "        ckpt_file = SAVE_DIR/f\"context_model.pth\"\n",
     "        torch.save(nn_model.state_dict(), ckpt_file)\n",
     "\n",
-    "        # save model to wandb as an Artifact\n",
-    "        artifact_name = f\"{wandb.run.id}_context_model\"\n",
-    "        at = wandb.Artifact(artifact_name, type=\"model\", \n",
-    "                            metadata={\"loss\":loss.item(), \"epoch\":ep})\n",
-    "        at.add_file(ckpt_file)\n",
-    "        wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n",
-    "\n",
-    "        # sample the model and log the images to W&B\n",
-    "        samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector[:config.num_samples])\n",
-    "        wandb.log({\"train_samples\": [wandb.Image(img) for img in samples.split(1)]})\n",
-    "\n",
     "# finish W&B run\n",
     "wandb.finish()"
    ]
diff --git a/dlai/utilities.py b/dlai/utilities.py
index f6d8269a..a11ea3b8 100644
--- a/dlai/utilities.py
+++ b/dlai/utilities.py
@@ -482,4 +482,8 @@ def sample_ddim_context(nn_model, noises, context, n=25):
         intermediate = np.stack(intermediate)
         return noises.clip(-1, 1), intermediate
     
-    return sample_ddim_context
\ No newline at end of file
+    return sample_ddim_context
+
+def to_classes(ctx_vector):
+    classes = "hero,non-hero,food,spell,side-facing".split(",")
+    return [classes[i] for i in ctx_vector.argmax(dim=1)]
\ No newline at end of file

From 475391378c792ea39dfa75e13de6808249567a2b Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 17:33:13 +0200
Subject: [PATCH 38/43] cleanup

---
 dlai/00_intro.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb
index 3d4b0a8c..632292f2 100644
--- a/dlai/00_intro.ipynb
+++ b/dlai/00_intro.ipynb
@@ -226,7 +226,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },

From d25e2da518fe423906b3dfc4a91db8a1aaf237dc Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 17:33:23 +0200
Subject: [PATCH 39/43] shorter lines

---
 dlai/02_diffusion_sampling.ipynb | 75 +++++++++++++++-----------------
 1 file changed, 35 insertions(+), 40 deletions(-)

diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index 8396b280..a814b291 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -20,8 +20,8 @@
    },
    "outputs": [],
    "source": [
-    "from types import SimpleNamespace\n",
     "from pathlib import Path\n",
+    "from types import SimpleNamespace\n",
     "import torch\n",
     "import torch.nn.functional as F\n",
     "import numpy as np\n",
@@ -58,7 +58,8 @@
    "outputs": [],
    "source": [
     "# Wandb Params\n",
-    "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:latest\" \n",
+    "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:staging\" \n",
+    "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
     "\n",
     "config = SimpleNamespace(\n",
     "    # hyperparameters\n",
@@ -74,9 +75,7 @@
     "    \n",
     "    # network hyperparameters\n",
     "    height = 16,\n",
-    ")\n",
-    "\n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\""
+    ")"
    ]
   },
   {
@@ -118,7 +117,7 @@
     "\n",
     "    # set the model to eval mode\n",
     "    model.eval()\n",
-    "    return model.to(device)"
+    "    return model.to(DEVICE)"
    ]
   },
   {
@@ -157,7 +156,7 @@
     "_, sample_ddpm_context = setup_ddpm(config.beta1, \n",
     "                                    config.beta2, \n",
     "                                    config.timesteps, \n",
-    "                                    device)"
+    "                                    DEVICE)"
    ]
   },
   {
@@ -179,7 +178,8 @@
    "source": [
     "# Noise vector\n",
     "# x_T ~ N(0, 1), sample initial noise\n",
-    "noises = torch.randn(config.num_samples, 3, config.height, config.height).to(device)  \n",
+    "noises = torch.randn(config.num_samples, 3, \n",
+    "                     config.height, config.height).to(DEVICE)  \n",
     "\n",
     "# A fixed context vector to sample from\n",
     "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
@@ -187,7 +187,7 @@
     "                                     2,2,2,2,2,2,\n",
     "                                     3,3,3,3,3,3,\n",
     "                                     4,4,4,4,4,4]), \n",
-    "                       5).to(device=device).float()"
+    "                       5).to(DEVICE).float()"
    ]
   },
   {
@@ -205,25 +205,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sample_ddim_context = setup_ddim(config.beta1, config.beta2, config.timesteps, device)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "aee10774-ff79-4df7-9b2d-1908561c23e5",
-   "metadata": {},
-   "source": [
-    "Let's create a `wandb.Table` to store our generations"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "table = wandb.Table(columns=[\"input_noise\", \"ddpm\", \"ddim\", \"class\"])"
+    "sample_ddim_context = setup_ddim(config.beta1, \n",
+    "                                 config.beta2, \n",
+    "                                 config.timesteps, \n",
+    "                                 DEVICE)"
    ]
   },
   {
@@ -260,27 +245,29 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ddim_samples, _ = sample_ddim_context(nn_model, noises, ctx_vector, n=config.ddim_n)"
+    "ddim_samples, _ = sample_ddim_context(nn_model, \n",
+    "                                      noises, \n",
+    "                                      ctx_vector, \n",
+    "                                      n=config.ddim_n)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "e2eb0969",
+   "id": "5db3cb01",
    "metadata": {},
    "source": [
-    "Let's put the class names as a column on the table"
+    "### Visualizing generations on a Table\n",
+    "Let's create a `wandb.Table` to store our generations"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "af33d3c4",
+   "id": "0f1d3b94",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def ctx_to_classes(ctx_vector):\n",
-    "    classes = \"hero,non-hero,food,spell,side-facing\".split(\",\")\n",
-    "    return [classes[i] for i in ctx_vector.argmax(dim=1)]"
+    "table = wandb.Table(columns=[\"input_noise\", \"ddpm\", \"ddim\", \"class\"])"
    ]
   },
   {
@@ -298,10 +285,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for noise, ddpm_sample, ddim_sample, c in zip(noises, ddpm_samples, ddim_samples, ctx_to_classes(ctx_vector)):\n",
-    "    table.add_data(wandb.Image(noise), # we log the input noise to analyse the impact\n",
-    "                   wandb.Image(ddpm_sample), \n",
-    "                   wandb.Image(ddim_sample),\n",
+    "for noise, ddpm_s, ddim_s, c in zip(noises, \n",
+    "                                    ddpm_samples, \n",
+    "                                    ddim_samples, \n",
+    "                                    to_classes(ctx_vector)):\n",
+    "    \n",
+    "    # add data row by row to the Table\n",
+    "    table.add_data(wandb.Image(noise),\n",
+    "                   wandb.Image(ddpm_s), \n",
+    "                   wandb.Image(ddim_s),\n",
     "                   c)"
    ]
   },
@@ -320,7 +312,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with wandb.init(project=\"dlai_sprite_diffusion\", job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n",
+    "with wandb.init(project=\"dlai_sprite_diffusion\", \n",
+    "                job_type=\"samplers_battle\", \n",
+    "                config=config):\n",
+    "    \n",
     "    wandb.log({\"samplers_table\":table})"
    ]
   }

From 47d2af00aeaaa3a526bd48a9edcdba92f5a7eaa3 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 19:09:07 +0200
Subject: [PATCH 40/43] split prefix + prompt

---
 dlai/04_train_llm.ipynb | 371 ++++++++++++++++++++++++++++++++--------
 1 file changed, 304 insertions(+), 67 deletions(-)

diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb
index 5148cb2c..e08cdd66 100644
--- a/dlai/04_train_llm.ipynb
+++ b/dlai/04_train_llm.ipynb
@@ -3,7 +3,9 @@
   {
    "cell_type": "markdown",
    "id": "1dfae479-9399-492d-acaa-d9751615ee86",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "source": [
     "# Finetuning a language model\n",
     "Let's see how to finetune a language model to generate character backstories using HuggingFace Trainer with wandb integration. We'll use a tiny language model (`TinyStories-33M`) due to resource constraints, but the lessons you learn here should be applicable to large models too!"
@@ -11,13 +13,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "f64e095c-d28b-44aa-a122-4121c5c66a5b",
+   "execution_count": 1,
+   "id": "a1f0e67f",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
-    "\n",
     "from transformers import AutoTokenizer\n",
     "from datasets import load_dataset\n",
     "from transformers import AutoModelForCausalLM\n",
@@ -28,7 +28,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
+   "id": "f79c25e3-5f18-4457-84e1-ed2c0d262222",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wandb.login(anonymous=\"allow\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
    "id": "2286ae41-213d-480d-a4ba-8c4e2e1c4771",
    "metadata": {},
    "outputs": [],
@@ -46,22 +74,64 @@
     "We'll start by loading a dataset containing Dungeons and Dragons character biographies from Huggingface. "
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "c9288a8e-b19b-4bd2-a72c-7dda03632282",
+   "metadata": {},
+   "source": [
+    "> You can expect to get some warning here, this is ok"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "a7535b8b-d220-44e8-a56c-97e250c36596",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Found cached dataset parquet (/Users/tcapelle/.cache/huggingface/datasets/MohamedRashad___parquet/MohamedRashad--characters_backstories-6398ba4bb1a6e421/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b1d8315d3ae54248840650543b19d386",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "ds = load_dataset('MohamedRashad/characters_backstories')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "13caeb7f-8a07-4ca2-a770-5b627238c2ac",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'text': 'Generate Backstory based on following information\\nCharacter Name: Dewin \\nCharacter Race: Halfling\\nCharacter Class: Sorcerer bard\\n\\nOutput:\\n',\n",
+       " 'target': 'Dewin thought he was a wizard, but it turned out it was the draconic blood in his veins that brought him eldritch power.  Music classes in wizarding college taught him yet another use for his power, and when he was expelled he took up adventuring'}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Let's take a look at one example\n",
     "ds[\"train\"][400]"
@@ -69,7 +139,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "7dae9106-8015-43da-a6d9-1124dee4bdde",
    "metadata": {},
    "outputs": [],
@@ -80,13 +150,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "7ea1602d-504b-43de-87ad-fcb35b9e61f7",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/1857 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/465 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "# We'll create a tokenizer from model checkpoint\n",
-    "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=False)\n",
     "\n",
     "# We'll need padding to have same length sequences in a batch\n",
     "tokenizer.pad_token = tokenizer.eos_token\n",
@@ -104,10 +203,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "a42417b8-ffa8-4d96-92ea-d8d949d87d5e",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Generate Backstory based on following information\n",
+      "Character Name: Tommy Universe\n",
+      "Character Race: Dwarf\n",
+      "Character Class: Fighter\n",
+      "\n",
+      "Output:\n",
+      " Tommy is 4'3 and relatively slight for a dwarf, because he is still quite young.\r",
+      "He wears scale mail, carries a light shield, a warhammer and a large pack of \"Useful Items\"\r",
+      "He has an eagerness for adventure and an undeserved confidence, since he sincerely believes he is the son of the god Moradin. He wears a large holy sigil of a hammer round his neck.\r",
+      "<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>\n"
+     ]
+    }
+   ],
    "source": [
     "# Let's check out one prepared example\n",
     "print(tokenizer.decode(tokenized_datasets[\"train\"][900]['input_ids']))"
@@ -124,7 +240,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "b4f131eb-979e-40f6-9e28-19756beaa8e4",
    "metadata": {},
    "outputs": [],
@@ -135,10 +251,84 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "7345ab23-8d12-4d4c-a39d-bb2202bff218",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "wandb version 0.15.5 is available!  To upgrade, please run:\n",
+       " $ pip install wandb --upgrade"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.4"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/Users/tcapelle/work/edu/dlai/wandb/run-20230718_172033-c2lx2628</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/dlai-lm-tuning/runs/c2lx2628' target=\"_blank\">zany-eon-5</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/dlai-lm-tuning' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/dlai-lm-tuning' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/dlai-lm-tuning</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/dlai-lm-tuning/runs/c2lx2628' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/dlai-lm-tuning/runs/c2lx2628</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "# Start a new wandb run\n",
     "run = wandb.init(project='dlai-lm-tuning', job_type=\"training\", anonymous=\"allow\")"
@@ -146,7 +336,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "d74ee155-3c30-4ef2-9c4d-fd8ee222c50c",
    "metadata": {},
    "outputs": [],
@@ -161,12 +351,13 @@
     "    evaluation_strategy = \"epoch\",\n",
     "    learning_rate=1e-4,\n",
     "    weight_decay=0.01,\n",
+    "    no_cuda=True, # force cpu use, will be renamed `use_cpu`\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "af62105f-a478-436f-88a2-5c1d78b9d20a",
    "metadata": {},
    "outputs": [],
@@ -182,89 +373,135 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "01958a56-c22a-4a27-bc71-41c59fc97f05",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Let's train!\n",
-    "trainer.train()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0a247e01-35d0-454f-8b7e-5f24cdf66f33",
+   "execution_count": 13,
+   "id": "816f4c88-bcf2-474a-afbc-b646f89df86c",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "device(type='cpu')"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "### Generate\n",
-    "Let's use our trained model to generate some texts with our provided prompts and save them in W&B Table. The model is tiny, replace it with a bigger one to get better results!"
+    "trainer.accelerator.device"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "6f16d43d-445f-4df5-8734-85584f95792f",
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 14,
+   "id": "01958a56-c22a-4a27-bc71-41c59fc97f05",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='233' max='233' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [233/233 02:49, Epoch 1/1]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>1</td>\n",
+       "      <td>5.321600</td>\n",
+       "      <td>3.384721</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=233, training_loss=3.7527249718940308, metrics={'train_runtime': 170.973, 'train_samples_per_second': 10.861, 'train_steps_per_second': 1.363, 'total_flos': 40423258718208.0, 'train_loss': 3.7527249718940308, 'epoch': 1.0})"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "model = trainer.model\n",
-    "device = next(model.parameters()).device"
+    "# Let's train!\n",
+    "trainer.train()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "id": "7911e43f-f4ce-4855-9f68-662438af8d24",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n",
+      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n",
+      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+     ]
+    }
+   ],
    "source": [
+    "prefix = \"Generate Backstory based on following information Character Name: \"\n",
+    "\n",
     "prompts = [\n",
-    "    \"Generate Backstory based on following information Character Name: Frogger Character Race: Aarakocra Character Class: Ranger Output: \",\n",
-    "    \"Generate Backstory based on following information Character Name: Smarty Character Race: Aasimar Character Class: Cleric Output: \",\n",
-    "    \"Generate Backstory based on following information Character Name: Volcano Character Race: Android Character Class: Paladin Output: \",\n",
+    "    \"Frogger Character Race: Aarakocra Character Class: Ranger Output: \",\n",
+    "    \"Smarty Character Race: Aasimar Character Class: Cleric Output: \",\n",
+    "    \"Volcano Character Race: Android Character Class: Paladin Output: \",\n",
     "]\n",
     "\n",
     "table = wandb.Table(columns=[\"prompt\", \"generation\"])\n",
     "\n",
     "for prompt in prompts:\n",
-    "    input_ids = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)\n",
+    "    input_ids = tokenizer.encode(prefix + prompt, return_tensors=\"pt\")\n",
     "    output = model.generate(input_ids, do_sample=True, max_new_tokens=50, top_p=0.3)\n",
     "    output_text = tokenizer.decode(output[0], skip_special_tokens=True)\n",
-    "    table.add_data(prompt, output_text)\n",
+    "    table.add_data(prefix + prompt, output_text)\n",
     "    \n",
     "wandb.log({'tiny_generations': table})"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "id": "3083c6a3-fdb8-44ab-a028-c0a222a2fdef",
    "metadata": {},
    "outputs": [],
    "source": [
     "wandb.finish()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "120d5e88-2460-4716-bcba-077ff4630772",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "976edc1f-418d-47a6-88e2-ca37e3b25366",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -278,7 +515,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,

From ae4c081b22cc534233cc047e3ba7128668cb5728 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 17:30:21 +0000
Subject: [PATCH 41/43] trained nb

---
 dlai/01_diffusion_training-instructor.ipynb | 30 ++++++++++++---------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/dlai/01_diffusion_training-instructor.ipynb b/dlai/01_diffusion_training-instructor.ipynb
index 9010b2d3..7b91e8b1 100644
--- a/dlai/01_diffusion_training-instructor.ipynb
+++ b/dlai/01_diffusion_training-instructor.ipynb
@@ -131,11 +131,10 @@
    "outputs": [],
    "source": [
     "# construct model\n",
-    "nn_model = ContextUnet(\n",
-    "    in_channels=3, \n",
-    "    n_feat=config.n_feat, \n",
-    "    n_cfeat=config.n_cfeat, \n",
-    "    height=config.height).to(DEVICE)"
+    "nn_model = ContextUnet(in_channels=3, \n",
+    "                       n_feat=config.n_feat, \n",
+    "                       n_cfeat=config.n_cfeat, \n",
+    "                       height=config.height).to(DEVICE)"
    ]
   },
   {
@@ -207,13 +206,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "f5f4af69",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "# create a wandb run\n",
     "run = wandb.init(project=\"dlai_sprite_diffusion\", \n",
     "                 job_type=\"train\", \n",
-    "                 anonymous=\"allow\", \n",
     "                 config=config)\n",
     "\n",
     "# we pass the config back from W&B\n",
@@ -227,12 +227,12 @@
     "    pbar = tqdm(dataloader, leave=False)\n",
     "    for x, c in pbar:   # x: images  c: context\n",
     "        optim.zero_grad()\n",
-    "        x = x.to(device)\n",
-    "        c = c.to(x)   \n",
-    "        context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n",
+    "        x = x.to(DEVICE)\n",
+    "        c = c.to(DEVICE)   \n",
+    "        context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(DEVICE)\n",
     "        c = c * context_mask.unsqueeze(-1)        \n",
     "        noise = torch.randn_like(x)\n",
-    "        t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(device) \n",
+    "        t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(DEVICE) \n",
     "        x_pert = perturb_input(x, t, noise)      \n",
     "        pred_noise = nn_model(x_pert, t / config.timesteps, c=c)      \n",
     "        loss = F.mse_loss(pred_noise, noise)\n",
@@ -245,13 +245,14 @@
     "                   \"epoch\": ep})\n",
     "\n",
     "    # save model periodically\n",
-    "    if ep%4==0 or ep == int(n_epoch-1):\n",
+    "    if ep%4==0 or ep == int(config.n_epoch-1):\n",
     "        nn_model.eval()\n",
     "        ckpt_file = SAVE_DIR/f\"context_model.pth\"\n",
     "        torch.save(nn_model.state_dict(), ckpt_file)\n",
     "        \n",
     "        \n",
     "        ###########################################################\n",
+    "        ### COPY TO DEMO NB #######################################\n",
     "        \n",
     "        # save model to wandb as an Artifact\n",
     "        artifact_name = f\"{wandb.run.id}_context_model\"\n",
@@ -260,6 +261,7 @@
     "        wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n",
     "        \n",
     "        ###########################################################\n",
+    "        ### COPY TO DEMO NB #######################################\n",
     "        \n",
     "        # sample the model and log the images to W&B\n",
     "        samples, _ = sample_ddpm_context(nn_model, \n",
@@ -271,6 +273,8 @@
     "            ]})\n",
     "        \n",
     "        ###########################################################\n",
+    "        ###########################################################\n",
+    "        \n",
     "# finish W&B run\n",
     "wandb.finish()"
    ]
@@ -292,7 +296,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,

From d844d5a9310b37b4a7ddbbedc28a7a2c43b0d645 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Tue, 18 Jul 2023 17:34:25 +0000
Subject: [PATCH 42/43] clean up

---
 dlai/01_diffusion_training-instructor.ipynb |  2 +-
 dlai/01_diffusion_training.ipynb            | 60 ++++++++++-----------
 2 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/dlai/01_diffusion_training-instructor.ipynb b/dlai/01_diffusion_training-instructor.ipynb
index 7b91e8b1..b0874865 100644
--- a/dlai/01_diffusion_training-instructor.ipynb
+++ b/dlai/01_diffusion_training-instructor.ipynb
@@ -199,7 +199,7 @@
    "source": [
     "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n",
     "\n",
-    "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua) <<"
+    "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/gwm91gsw) <<"
    ]
   },
   {
diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index f8e87ae3..4c88cd99 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "958524a2-cb56-439e-850e-032dd10478f2",
+   "id": "e57c5e2c-04f8-40b7-9b47-e5e05505cb2c",
    "metadata": {},
    "source": [
     "# Training a Diffusion Model with Weights and Biases (W&B)\n",
@@ -17,7 +17,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "700e687c",
+   "id": "d4a34666-2281-49e3-8574-93d57c72771b",
    "metadata": {
     "tags": []
    },
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "8969ab86-bd9b-475d-96e2-b913b42dec14",
+   "id": "2b4dd4a3-b05e-4a7f-811e-a715573761e9",
    "metadata": {},
    "source": [
     "We encourage you to create an account to get the full user experience from W&B"
@@ -46,7 +46,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b88f9513",
+   "id": "904d68fe-7435-48a3-b8af-c4be8675311c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -55,7 +55,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "7c0d229a",
+   "id": "02e2b5b2-82e4-4535-aa98-34ae64a808e8",
    "metadata": {},
    "source": [
     "## Setting Things Up"
@@ -64,7 +64,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3d76c167-7122-4f88-9c9f-5ded96684fa5",
+   "id": "4918eda7-6d6b-4f9f-8650-c347ed4a5d1c",
    "metadata": {
     "tags": []
    },
@@ -99,7 +99,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "9c99dea4",
+   "id": "1ed92a7b-b6a3-4c0c-a35d-154ec26ed923",
    "metadata": {},
    "source": [
     "### Setup DDPM noise scheduler and sampler (same as in the Diffusion course). \n",
@@ -110,7 +110,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6c642e1d",
+   "id": "5ba81b76-6521-4c7c-80bd-bacde0361a34",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -124,24 +124,23 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6bc9001e",
+   "id": "c83bd768-f709-410a-8062-703bde7997d8",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
     "# construct model\n",
-    "nn_model = ContextUnet(\n",
-    "    in_channels=3, \n",
-    "    n_feat=config.n_feat, \n",
-    "    n_cfeat=config.n_cfeat, \n",
-    "    height=config.height).to(DEVICE)"
+    "nn_model = ContextUnet(in_channels=3, \n",
+    "                       n_feat=config.n_feat, \n",
+    "                       n_cfeat=config.n_cfeat, \n",
+    "                       height=config.height).to(DEVICE)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "76c63b85",
+   "id": "cf98a114-f7aa-4cbd-b08c-d56ad628da21",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -156,7 +155,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "d9ed46d7",
+   "id": "bdccd6e0-850a-41ed-89e7-db629f838770",
    "metadata": {},
    "source": [
     "## Training"
@@ -164,7 +163,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c",
+   "id": "2338bec6-319c-4603-8ae6-0e1fcbdd3a4e",
    "metadata": {},
    "source": [
     "We choose a fixed context vector with 6 samples of each class to guide our diffusion"
@@ -173,7 +172,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d88afdba",
+   "id": "56bfcd32-1a9c-4d0e-8237-77da217f41ae",
    "metadata": {
     "tags": []
    },
@@ -195,25 +194,26 @@
   },
   {
    "cell_type": "markdown",
-   "id": "26765a7e-4ddc-449e-95c3-54c58a564738",
+   "id": "e854b7c7-fa0d-4413-8642-f824449d6763",
    "metadata": {},
    "source": [
     "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n",
     "\n",
-    "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua) <<"
+    "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/gwm91gsw) <<"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f5f4af69",
-   "metadata": {},
+   "id": "2c87ca8f-2c09-487f-a8bc-7030c2b76492",
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "# create a wandb run\n",
     "run = wandb.init(project=\"dlai_sprite_diffusion\", \n",
     "                 job_type=\"train\", \n",
-    "                 anonymous=\"allow\", \n",
     "                 config=config)\n",
     "\n",
     "# we pass the config back from W&B\n",
@@ -227,12 +227,12 @@
     "    pbar = tqdm(dataloader, leave=False)\n",
     "    for x, c in pbar:   # x: images  c: context\n",
     "        optim.zero_grad()\n",
-    "        x = x.to(device)\n",
-    "        c = c.to(x)   \n",
-    "        context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n",
+    "        x = x.to(DEVICE)\n",
+    "        c = c.to(DEVICE)   \n",
+    "        context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(DEVICE)\n",
     "        c = c * context_mask.unsqueeze(-1)        \n",
     "        noise = torch.randn_like(x)\n",
-    "        t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(device) \n",
+    "        t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(DEVICE) \n",
     "        x_pert = perturb_input(x, t, noise)      \n",
     "        pred_noise = nn_model(x_pert, t / config.timesteps, c=c)      \n",
     "        loss = F.mse_loss(pred_noise, noise)\n",
@@ -245,11 +245,11 @@
     "                   \"epoch\": ep})\n",
     "\n",
     "    # save model periodically\n",
-    "    if ep%4==0 or ep == int(n_epoch-1):\n",
+    "    if ep%4==0 or ep == int(config.n_epoch-1):\n",
     "        nn_model.eval()\n",
     "        ckpt_file = SAVE_DIR/f\"context_model.pth\"\n",
     "        torch.save(nn_model.state_dict(), ckpt_file)\n",
-    "\n",
+    "        \n",
     "# finish W&B run\n",
     "wandb.finish()"
    ]
@@ -271,7 +271,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,

From f5c51a3e0cfe62c4f6f5f0c18356b9edc7b3a947 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Wed, 19 Jul 2023 15:54:17 +0200
Subject: [PATCH 43/43] add class names as comments

---
 dlai/01_diffusion_training-instructor.ipynb |  12 +-
 dlai/01_diffusion_training.ipynb            |  29 ++-
 dlai/02_diffusion_sampling.ipynb            | 236 ++++++++++++++++++--
 3 files changed, 241 insertions(+), 36 deletions(-)

diff --git a/dlai/01_diffusion_training-instructor.ipynb b/dlai/01_diffusion_training-instructor.ipynb
index b0874865..83760618 100644
--- a/dlai/01_diffusion_training-instructor.ipynb
+++ b/dlai/01_diffusion_training-instructor.ipynb
@@ -184,11 +184,11 @@
     "                     config.height, config.height).to(DEVICE)  \n",
     "\n",
     "# A fixed context vector to sample from\n",
-    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
-    "                                     1,1,1,1,1,1,\n",
-    "                                     2,2,2,2,2,2,\n",
-    "                                     3,3,3,3,3,3,\n",
-    "                                     4,4,4,4,4,4]), \n",
+    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,   # hero\n",
+    "                                     1,1,1,1,1,1,   # non-hero\n",
+    "                                     2,2,2,2,2,2,   # food\n",
+    "                                     3,3,3,3,3,3,   # spell\n",
+    "                                     4,4,4,4,4,4]), # side-facing \n",
     "                       5).to(DEVICE).float()"
    ]
   },
@@ -296,7 +296,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,
diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb
index 4c88cd99..b15b820d 100644
--- a/dlai/01_diffusion_training.ipynb
+++ b/dlai/01_diffusion_training.ipynb
@@ -184,11 +184,11 @@
     "                     config.height, config.height).to(DEVICE)  \n",
     "\n",
     "# A fixed context vector to sample from\n",
-    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
-    "                                     1,1,1,1,1,1,\n",
-    "                                     2,2,2,2,2,2,\n",
-    "                                     3,3,3,3,3,3,\n",
-    "                                     4,4,4,4,4,4]), \n",
+    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,   # hero\n",
+    "                                     1,1,1,1,1,1,   # non-hero\n",
+    "                                     2,2,2,2,2,2,   # food\n",
+    "                                     3,3,3,3,3,3,   # spell\n",
+    "                                     4,4,4,4,4,4]), # side-facing \n",
     "                       5).to(DEVICE).float()"
    ]
   },
@@ -248,7 +248,22 @@
     "    if ep%4==0 or ep == int(config.n_epoch-1):\n",
     "        nn_model.eval()\n",
     "        ckpt_file = SAVE_DIR/f\"context_model.pth\"\n",
-    "        torch.save(nn_model.state_dict(), ckpt_file)\n",
+    "        torch.save(nn_model.state_dict(), ckpt_file\n",
+    "                   \n",
+    "        # save model to wandb as an Artifact\n",
+    "        artifact_name = f\"{wandb.run.id}_context_model\"\n",
+    "        at = wandb.Artifact(artifact_name, type=\"model\")\n",
+    "        at.add_file(ckpt_file)\n",
+    "        wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n",
+    "                   \n",
+    "        # sample the model and log the images to W&B\n",
+    "        samples, _ = sample_ddpm_context(nn_model, \n",
+    "                                         noises, \n",
+    "                                         ctx_vector[:config.num_samples])\n",
+    "        wandb.log({\n",
+    "            \"train_samples\": [\n",
+    "                wandb.Image(img) for img in samples.split(1)\n",
+    "            ]})\n",
     "        \n",
     "# finish W&B run\n",
     "wandb.finish()"
@@ -271,7 +286,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,
diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb
index a814b291..cbc1e181 100644
--- a/dlai/02_diffusion_sampling.ipynb
+++ b/dlai/02_diffusion_sampling.ipynb
@@ -13,7 +13,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "700e687c",
    "metadata": {
     "tags": []
@@ -32,10 +32,44 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "dcaf7a29-782c-4735-991f-4408f5ec6128",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: (1) Private W&B dashboard, no account required\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: (2) Use an existing W&B account\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "wandb: Enter your choice: 1\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m: You chose 'Private W&B dashboard, no account required'\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /Users/tcapelle/.netrc\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "wandb.login(anonymous=\"allow\")"
    ]
@@ -50,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "54c3a942",
    "metadata": {
     "tags": []
@@ -88,7 +122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "8ab66255",
    "metadata": {},
    "outputs": [],
@@ -122,10 +156,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "b47633e2",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[34m\u001b[1mwandb\u001b[0m:   1 of 1 files downloaded.  \n"
+     ]
+    }
+   ],
    "source": [
     "nn_model = load_model(MODEL_ARTIFACT)"
    ]
@@ -148,7 +190,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "146424d3",
    "metadata": {},
    "outputs": [],
@@ -169,7 +211,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "d88afdba",
    "metadata": {
     "tags": []
@@ -182,11 +224,11 @@
     "                     config.height, config.height).to(DEVICE)  \n",
     "\n",
     "# A fixed context vector to sample from\n",
-    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n",
-    "                                     1,1,1,1,1,1,\n",
-    "                                     2,2,2,2,2,2,\n",
-    "                                     3,3,3,3,3,3,\n",
-    "                                     4,4,4,4,4,4]), \n",
+    "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,   # hero\n",
+    "                                     1,1,1,1,1,1,   # non-hero\n",
+    "                                     2,2,2,2,2,2,   # food\n",
+    "                                     3,3,3,3,3,3,   # spell\n",
+    "                                     4,4,4,4,4,4]), # side-facing \n",
     "                       5).to(DEVICE).float()"
    ]
   },
@@ -200,7 +242,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "9c1a945d",
    "metadata": {},
    "outputs": [],
@@ -222,10 +264,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "89e24210-4885-4559-92e1-db10566ef5ea",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/500 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "ddpm_samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector)"
    ]
@@ -240,10 +297,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "25b07c26-0ac2-428a-8351-34f8b7228074",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/25 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "ddim_samples, _ = sample_ddim_context(nn_model, \n",
     "                                      noises, \n",
@@ -262,7 +334,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "0f1d3b94",
    "metadata": {},
    "outputs": [],
@@ -280,7 +352,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3",
    "metadata": {},
    "outputs": [],
@@ -307,10 +379,120 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "wandb version 0.15.5 is available!  To upgrade, please run:\n",
+       " $ pip install wandb --upgrade"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.4"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>/Users/tcapelle/work/edu/dlai/wandb/run-20230719_144552-50ekio0x</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/50ekio0x' target=\"_blank\">fresh-plasma-6</a></strong> to <a href='https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/50ekio0x' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/50ekio0x</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run <strong style=\"color:#cdcd00\">fresh-plasma-6</strong> at: <a href='https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/50ekio0x' target=\"_blank\">https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/50ekio0x</a><br/>Synced 6 W&B file(s), 1 media file(s), 94 artifact file(s) and 1 other file(s)"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Find logs at: <code>./wandb/run-20230719_144552-50ekio0x/logs</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "with wandb.init(project=\"dlai_sprite_diffusion\", \n",
     "                job_type=\"samplers_battle\", \n",
@@ -318,6 +500,14 @@
     "    \n",
     "    wandb.log({\"samplers_table\":table})"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7df56d25",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {