From f1373a38fff35b4eaaad4d1f5261f43b04e9a37b Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Wed, 5 Jul 2023 18:13:54 +0200 Subject: [PATCH 01/43] add code --- dlai/00_intro.ipynb | 577 ++++++++++++++++++++++++ dlai/01_diffusion_training.ipynb | 344 +++++++++++++++ dlai/02_diffusion_sampling.ipynb | 735 +++++++++++++++++++++++++++++++ dlai/03_llm_eval.ipynb | 364 +++++++++++++++ dlai/README.md | 13 + dlai/requirements.txt | 7 + dlai/utilities.py | 376 ++++++++++++++++ 7 files changed, 2416 insertions(+) create mode 100644 dlai/00_intro.ipynb create mode 100644 dlai/01_diffusion_training.ipynb create mode 100644 dlai/02_diffusion_sampling.ipynb create mode 100644 dlai/03_llm_eval.ipynb create mode 100644 dlai/README.md create mode 100644 dlai/requirements.txt create mode 100644 dlai/utilities.py diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb new file mode 100644 index 00000000..1cc2ecb6 --- /dev/null +++ b/dlai/00_intro.ipynb @@ -0,0 +1,577 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e9ba792c-2baa-4c19-a132-2ed82a759e79", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "from types import SimpleNamespace\n", + "\n", + "import wandb\n", + "from tqdm.auto import tqdm\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "from torch.optim import Adam\n", + "from torch.utils.data import DataLoader, Subset\n", + "\n", + "from utilities import *\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d51a9f7f", + "metadata": {}, + "outputs": [], + "source": [ + "# Constants\n", + "INPUT_SIZE = 3 * 16 * 16\n", + "OUTPUT_SIZE = 5\n", + "HIDDEN_SIZE = 256\n", + "NUM_WORKERS = 2\n", + "CLASSES = [\"hero\", \"non-hero\", \"food\", \"spell\", \"side-facing\"]\n", + "\n", + "# Device\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "device = torch.device(\"mps\" if torch.backends.mps.is_built() else \"cpu\")\n", + "\n", + "data_dir = './data/'\n", + "\n", + "def get_dataloaders(batch_size, slice_size=None, valid_pct=0.2):\n", + " dataset = CustomDataset.from_np(data_dir + \"sprites_1788_16x16.npy\", data_dir + \"sprite_labels_nc_1788_16x16.npy\", argmax=True)\n", + "\n", + " if slice_size:\n", + " dataset = dataset.subset(slice_size)\n", + "\n", + " train_ds, valid_ds = dataset.split(valid_pct)\n", + "\n", + " train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=1) \n", + " valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=1)\n", + "\n", + " return train_dl, valid_dl\n", + "\n", + "def get_model(dropout):\n", + " return nn.Sequential(\n", + " nn.Flatten(),\n", + " nn.Linear(INPUT_SIZE, HIDDEN_SIZE),\n", + " nn.BatchNorm1d(HIDDEN_SIZE),\n", + " nn.ReLU(),\n", + " nn.Dropout(dropout),\n", + " nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE)\n", + " ).to(device)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8700b5fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sprite shape: (89400, 16, 16, 3)\n", + "labels shape: (89400,)\n", + "sprite shape: (1000, 16, 16, 3)\n", + "labels shape: (1000,)\n" + ] + } + ], + "source": [ + "train_dl, valid_dl = get_dataloaders(128, slice_size=1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "8401cf96", + "metadata": {}, + "outputs": [], + "source": [ + "def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):\n", + " \"Compute the performance of the model on the validation dataset and log a wandb.Table\"\n", + " model.eval()\n", + " val_loss = 0.0\n", + " correct = 0\n", + "\n", + " with torch.inference_mode():\n", + " for i, (images, labels) in enumerate(valid_dl):\n", + " images, labels = images.to(device), labels.to(device)\n", + "\n", + " # Forward pass\n", + " outputs = model(images)\n", + " val_loss += loss_func(outputs, labels) * labels.size(0)\n", + "\n", + " # Compute accuracy and accumulate\n", + " _, predicted = torch.max(outputs.data, 1)\n", + " correct += (predicted == labels).sum().item()\n", + "\n", + " # Log one batch of images to the dashboard, always same batch_idx.\n", + " if i == batch_idx and log_images:\n", + " log_image_predictions_table(images, predicted, labels, outputs.softmax(dim=1))\n", + "\n", + " return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n", + "\n", + "\n", + "def log_image_predictions_table(images, predicted, labels, probs):\n", + " \"Create a wandb Table to log images, labels, and predictions\"\n", + " table = wandb.Table(columns=[\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)])\n", + " \n", + " for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n", + " table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n", + " \n", + " wandb.log({\"predictions_table\": table}, commit=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "e5492ebb-2dfa-44ce-af6c-24655e45a2ed", + "metadata": {}, + "outputs": [], + "source": [ + "def train_model(config):\n", + " \"Train a model with a given config\"\n", + " wandb.init(\n", + " project=\"deeplearningai-intro\",\n", + " config=config\n", + " )\n", + "\n", + " # Get the data\n", + " train_dl, valid_dl = get_dataloaders(config.batch_size, config.slice_size, config.valid_pct)\n", + " n_steps_per_epoch = math.ceil(len(train_dl.dataset) / config.batch_size)\n", + "\n", + " # A simple MLP model\n", + " model = get_model(config.dropout)\n", + "\n", + " # Make the loss and optimizer\n", + " loss_func = nn.CrossEntropyLoss()\n", + " optimizer = Adam(model.parameters(), lr=config.lr)\n", + "\n", + " example_ct = 0\n", + "\n", + " for epoch in tqdm(range(config.epochs), total=config.epochs):\n", + " model.train()\n", + "\n", + " for step, (images, labels) in enumerate(train_dl):\n", + " images, labels = images.to(device), labels.to(device)\n", + "\n", + " outputs = model(images)\n", + " train_loss = loss_func(outputs, labels)\n", + " optimizer.zero_grad()\n", + " train_loss.backward()\n", + " optimizer.step()\n", + "\n", + " example_ct += len(images)\n", + " metrics = {\n", + " \"train/train_loss\": train_loss,\n", + " \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch)) / n_steps_per_epoch,\n", + " \"train/example_ct\": example_ct\n", + " }\n", + "\n", + " if step + 1 < n_steps_per_epoch:\n", + " # Log train metrics to wandb \n", + " wandb.log(metrics)\n", + " \n", + " val_loss, accuracy = validate_model(model, valid_dl, loss_func, log_images=(epoch == (config.epochs - 1)))\n", + "\n", + " # Log train and validation metrics to wandb\n", + " val_metrics = {\n", + " \"val/val_loss\": val_loss,\n", + " \"val/val_accuracy\": accuracy\n", + " }\n", + " wandb.log({**metrics, **val_metrics})\n", + "\n", + " # If you had a test set, this is how you could log it as a Summary metric\n", + " wandb.run.summary['test_accuracy'] = 0.8\n", + "\n", + " wandb.finish()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee", + "metadata": {}, + "outputs": [], + "source": [ + "config = SimpleNamespace(\n", + " epochs = 3,\n", + " batch_size = 128,\n", + " lr = 1e-3,\n", + " dropout = 0.1,\n", + " slice_size = 1000,\n", + " valid_pct = 0.2,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.4" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /Users/tcapelle/work/dlai/wandb/run-20230705_144549-dg3tar8b" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run morning-jazz-7 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/capecape/deeplearningai-intro" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sprite shape: (89400, 16, 16, 3)\n", + "labels shape: (89400,)\n", + "sprite shape: (1000, 16, 16, 3)\n", + "labels shape: (1000,)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c2a535bd5a0d481e9916ab5f71edbf41", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/3 [00:00(success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "70d7c9e0e2fb4edd969b284271d72c6f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='0.161 MB of 0.161 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "

Run history:


train/epoch▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇██
train/example_ct▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇███
train/train_loss█▆▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val/val_accuracy▁▆█
val/val_loss█▃▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct2400
train/train_loss0.21717
val/val_accuracy0.92
val/val_loss0.32078

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run morning-jazz-7 at: https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b
Synced 7 W&B file(s), 1 media file(s), 126 artifact file(s) and 2 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20230705_144549-dg3tar8b/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "train_model(config)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "e9ecf01d", + "metadata": {}, + "source": [ + "Let's try with another value of dropout:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "4f40520a-66f8-4415-9e36-174dda06aca0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.4" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /Users/tcapelle/work/dlai/wandb/run-20230705_144416-iysb84lz" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run icy-water-6 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/capecape/deeplearningai-intro" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sprite shape: (89400, 16, 16, 3)\n", + "labels shape: (89400,)\n", + "sprite shape: (1000, 16, 16, 3)\n", + "labels shape: (1000,)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e576d9f253b6400d80f8022dafbdd326", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/3 [00:00(success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7bd6b3a334c94ce58afd7c25e10d8d5a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='0.161 MB of 0.161 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "

Run history:


train/epoch▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇██
train/example_ct▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇███
train/train_loss█▆▅▄▄▃▃▂▂▂▂▁▂▂▁▁▁▁▁▁▁
val/val_accuracy▁██
val/val_loss█▂▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct2400
train/train_loss0.27209
val/val_accuracy0.92
val/val_loss0.32183

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run icy-water-6 at: https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz
Synced 7 W&B file(s), 1 media file(s), 126 artifact file(s) and 2 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20230705_144416-iysb84lz/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "config.dropout = 0.5\n", + "train_model(config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bba77c03-fcd2-43ef-9a11-8cebef617c23", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb new file mode 100644 index 00000000..46530b6b --- /dev/null +++ b/dlai/01_diffusion_training.ipynb @@ -0,0 +1,344 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "958524a2-cb56-439e-850e-032dd10478f2", + "metadata": {}, + "source": [ + "# Training a Diffusion Model with W&B\n", + "\n", + "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook and add:\n", + "- Logging of the training loss\n", + "- Sampling from the model during training and logging the samples to W&B\n", + "- Saving the model checkpoints to W&B" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "700e687c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from tqdm.notebook import tqdm\n", + "import torch\n", + "import torch.nn.functional as F\n", + "from torch.utils.data import DataLoader\n", + "import numpy as np\n", + "from utilities import *\n", + "\n", + "import wandb" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "7c0d229a", + "metadata": {}, + "source": [ + "# Setting Things Up" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "54c3a942", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# hyperparameters\n", + "num_samples = 32\n", + "\n", + "# diffusion hyperparameters\n", + "timesteps = 500\n", + "beta1 = 1e-4\n", + "beta2 = 0.02\n", + "\n", + "# network hyperparameters\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n", + "# device = \"mps\"\n", + "n_feat = 64 # 64 hidden dimension feature\n", + "n_cfeat = 5 # context vector is of size 5\n", + "height = 16 # 16x16 image\n", + "data_dir = './data/'\n", + "save_dir = './data/weights/'\n", + "if not os.path.exists(save_dir):\n", + " os.mkdir(save_dir)\n", + "\n", + "# training hyperparameters\n", + "batch_size = 100\n", + "n_epoch = 32\n", + "lrate=1e-3\n", + "\n", + "# we are storing the parameters in a dictionary to be logged to wandb\n", + "config = dict(\n", + " num_samples=num_samples,\n", + " timesteps=timesteps,\n", + " beta1=beta1,\n", + " beta2=beta2,\n", + " device=device,\n", + " n_feat=n_feat,\n", + " n_cfeat=n_cfeat,\n", + " height=height,\n", + " save_dir=save_dir,\n", + " batch_size=batch_size,\n", + " n_epoch=n_epoch,\n", + " lrate=lrate,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bb43f98f", + "metadata": {}, + "source": [ + "All this is the same as the previous notebook, except for the addition of the context vector size n_cfeat. We will use this to condition the diffusion model on a context vector." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a705d0a8", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# construct DDPM noise schedule\n", + "b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1\n", + "a_t = 1 - b_t\n", + "ab_t = torch.cumsum(a_t.log(), dim=0).exp() \n", + "ab_t[0] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6bc9001e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# construct model\n", + "nn_model = ContextUnet(in_channels=3, n_feat=n_feat, n_cfeat=n_cfeat, height=height).to(device)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "76c63b85", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sprite shape: (89400, 16, 16, 3)\n", + "labels shape: (89400, 5)\n" + ] + } + ], + "source": [ + "# load dataset and construct optimizer\n", + "dataset = CustomDataset.from_np(data_dir + \"sprites_1788_16x16.npy\", data_dir + \"sprite_labels_nc_1788_16x16.npy\")\n", + "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)\n", + "optim = torch.optim.Adam(nn_model.parameters(), lr=lrate, eps=1e-5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb13689d", + "metadata": {}, + "outputs": [], + "source": [ + "# helper function: perturbs an image to a specified noise level\n", + "def perturb_input(x, t, noise):\n", + " return ab_t.sqrt()[t, None, None, None] * x + (1 - ab_t[t, None, None, None]) * noise" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "fe8eb277", + "metadata": {}, + "source": [ + "## Sampling" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3", + "metadata": {}, + "source": [ + "We will need to instrument the sampler to have telemetry on the generated images while training!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b0f5bed", + "metadata": {}, + "outputs": [], + "source": [ + "# helper function; removes the predicted noise (but adds some noise back in to avoid collapse)\n", + "def denoise_add_noise(x, t, pred_noise, z=None):\n", + " if z is None:\n", + " z = torch.randn_like(x)\n", + " noise = b_t.sqrt()[t] * z\n", + " mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt()\n", + " return mean + noise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16085a65", + "metadata": {}, + "outputs": [], + "source": [ + "# sample with context using standard algorithm\n", + "# we make a change to the original algorithm to allow for context explicitely (the noises)\n", + "@torch.no_grad()\n", + "def sample_ddpm_context(samples, context, save_rate=20):\n", + " # array to keep track of generated steps for plotting\n", + " intermediate = [] \n", + " for i in range(timesteps, 0, -1):\n", + " # reshape time tensor\n", + " t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n", + "\n", + " # sample some random noise to inject back in. For i = 1, don't add back in noise\n", + " z = torch.randn_like(samples) if i > 1 else 0\n", + "\n", + " eps = nn_model(samples, t, c=context) # predict noise e_(x_t,t, ctx)\n", + " samples = denoise_add_noise(samples, i, eps, z)\n", + " if i % save_rate==0 or i==timesteps or i<8:\n", + " intermediate.append(samples.detach().cpu().numpy())\n", + "\n", + " intermediate = np.stack(intermediate)\n", + " return samples.clip(-1, 1), intermediate" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d9ed46d7", + "metadata": {}, + "source": [ + "# Training" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c", + "metadata": {}, + "source": [ + "we choose a fixed context vector with 6 of each class, this way we know what to expect on the workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d88afdba", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Noise vector\n", + "# x_T ~ N(0, 1), sample initial noise\n", + "noises = torch.randn(num_samples, 3, height, height).to(device) \n", + "\n", + "# A fixed context vector to sample from\n", + "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f4af69", + "metadata": {}, + "outputs": [], + "source": [ + "# same code as before, added comments on the extra W&B instrumentation lines\n", + "# create a wandb run\n", + "run = wandb.init(project=\"dlai_diffusion\", job_type=\"train_conditional\", config=config)\n", + "\n", + "for ep in range(n_epoch):\n", + " # set into train mode\n", + " nn_model.train()\n", + " optim.param_groups[0]['lr'] = lrate*(1-ep/n_epoch)\n", + " \n", + " pbar = tqdm(dataloader, leave=False)\n", + " for x, c in pbar: # x: images c: context\n", + " optim.zero_grad()\n", + " x = x.to(device)\n", + " c = c.to(x) \n", + " context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n", + " c = c * context_mask.unsqueeze(-1) \n", + " noise = torch.randn_like(x)\n", + " t = torch.randint(1, timesteps + 1, (x.shape[0],)).to(device) \n", + " x_pert = perturb_input(x, t, noise) \n", + " pred_noise = nn_model(x_pert, t / timesteps, c=c) \n", + " loss = F.mse_loss(pred_noise, noise)\n", + " loss.backward() \n", + " optim.step()\n", + "\n", + " # we log the relevant metrics to the workspace\n", + " wandb.log({\"loss\": loss.item(),\n", + " \"lr\": optim.param_groups[0]['lr'],\n", + " \"epoch\": ep})\n", + "\n", + " # save model periodically\n", + " if ep%4==0 or ep == int(n_epoch-1):\n", + " nn_model.eval()\n", + " ckpt_file = save_dir + f\"context_model_{ep}.pth\"\n", + " torch.save(nn_model.state_dict(), ckpt_file)\n", + "\n", + " # save model to wandb as an Artifact\n", + " artifact_name = f\"{wandb.run.id}_context_model\"\n", + " at = wandb.Artifact(artifact_name, type=\"model\", \n", + " metadata={\"loss\":loss.item(), \"epoch\":ep})\n", + " at.add_file(ckpt_file)\n", + " wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n", + "\n", + " # sample the model and log the images to W&B\n", + " samples, _ = sample_ddpm_context(noises, ctx_vector[:num_samples])\n", + " wandb.log({\"train_samples\": [wandb.Image(img) for img in samples.split(1)]})\n", + "\n", + "# finish W&B run\n", + "wandb.finish()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb new file mode 100644 index 00000000..23cd1f26 --- /dev/null +++ b/dlai/02_diffusion_sampling.ipynb @@ -0,0 +1,735 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "958524a2-cb56-439e-850e-032dd10478f2", + "metadata": {}, + "source": [ + "# Sampling from a diffusion model\n", + "In this notebooks we will sampled from the previously trained diffusion model.\n", + "- We are going to compare the samples from DDPM and DDIM samplers\n", + "- Visualize mixing samples with conditional diffusion models" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "700e687c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn.functional as F\n", + "import numpy as np\n", + "from utilities import *\n", + "\n", + "import wandb" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "7c0d229a", + "metadata": {}, + "source": [ + "# Setting Things Up" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "54c3a942", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Wandb Params\n", + "project = \"debug_dlai\"\n", + "entity = \"capecape\"\n", + "\n", + "# ddpm sampler hyperparameters\n", + "timesteps = 500\n", + "beta1 = 1e-4\n", + "beta2 = 0.02\n", + "num_samples = 32\n", + "height = 16\n", + "ddim_n = 25\n", + "\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n", + "\n", + "device = \"mps\"\n", + "\n", + "# we are storing the parameters in a dictionary to be logged to wandb\n", + "config = dict(\n", + " timesteps=timesteps,\n", + " beta1=beta1,\n", + " beta2=beta2,\n", + " num_samples=num_samples,\n", + " height=height,\n", + " ddim_n=ddim_n,\n", + " device=device,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "bb43f98f", + "metadata": {}, + "source": [ + "We will load the model from a wandb.Artifact and set up the sampling loop." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8ab66255", + "metadata": {}, + "outputs": [], + "source": [ + "def load_model(model_artifact_name):\n", + " \"Load the model from wandb artifacts\"\n", + " api = wandb.Api()\n", + " artifact = api.artifact(model_artifact_name, type=\"model\")\n", + " model_path = artifact.download()\n", + "\n", + " # recover model info from the registry\n", + " producer_run = artifact.logged_by()\n", + "\n", + " # load the weights dictionary\n", + " model_weights = torch.load(model_path + f\"/context_model_31.pth\", map_location=\"cpu\")\n", + "\n", + " # create the model\n", + " model = ContextUnet(in_channels=3, \n", + " n_feat=producer_run.config[\"n_feat\"], \n", + " n_cfeat=producer_run.config[\"n_cfeat\"], \n", + " height=producer_run.config[\"height\"])\n", + " \n", + " # load the weights into the model\n", + " model.load_state_dict(model_weights)\n", + "\n", + " # set the model to eval mode\n", + " model.eval()\n", + " return model.to(device)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b47633e2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 1 of 1 files downloaded. \n" + ] + } + ], + "source": [ + "nn_model = load_model('capecape/dlai_diffusion/w1r7jpji_context_model:v8')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "fe8eb277", + "metadata": {}, + "source": [ + "## Sampling" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3", + "metadata": {}, + "source": [ + "We will sample and log the generated samples to wandb." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f6f479d1", + "metadata": {}, + "outputs": [], + "source": [ + "# construct DDPM noise schedule\n", + "b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1\n", + "a_t = 1 - b_t\n", + "ab_t = torch.cumsum(a_t.log(), dim=0).exp() \n", + "ab_t[0] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8b0f5bed", + "metadata": {}, + "outputs": [], + "source": [ + "# helper function; removes the predicted noise (but adds some noise back in to avoid collapse)\n", + "def denoise_add_noise(x, t, pred_noise, z=None):\n", + " if z is None:\n", + " z = torch.randn_like(x)\n", + " noise = b_t.sqrt()[t] * z\n", + " mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt()\n", + " return mean + noise" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "16085a65", + "metadata": {}, + "outputs": [], + "source": [ + "# sample with context using standard algorithm\n", + "# we make a change to the original algorithm to allow for context and passing a noise tensor (samples)\n", + "@torch.no_grad()\n", + "def sample_ddpm_context(samples, context, save_rate=20):\n", + " # array to keep track of generated steps for plotting\n", + " intermediate = [] \n", + " for i in range(timesteps, 0, -1):\n", + " # reshape time tensor\n", + " t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n", + "\n", + " # sample some random noise to inject back in. For i = 1, don't add back in noise\n", + " z = torch.randn_like(samples) if i > 1 else 0\n", + "\n", + " eps = nn_model(samples, t, c=context) # predict noise e_(x_t,t, ctx)\n", + " samples = denoise_add_noise(samples, i, eps, z)\n", + " if i % save_rate==0 or i==timesteps or i<8:\n", + " print(f'sampling timestep {i:3d}', end='\\r')\n", + " intermediate.append(samples.detach().cpu().numpy())\n", + "\n", + " intermediate = np.stack(intermediate)\n", + " return samples.clip(-1, 1), intermediate" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c", + "metadata": {}, + "source": [ + "Let's define a set of noises and a context vector to condition on." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d88afdba", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Noise vector\n", + "# x_T ~ N(0, 1), sample initial noise\n", + "noises = torch.randn(num_samples, 3, height, height).to(device) \n", + "\n", + "# A fixed context vector to sample from\n", + "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "1cbf9ef8-619a-4052-a138-a88c0f0f8b0b", + "metadata": {}, + "source": [ + "Let's bring that faster DDIM sampler from the diffusion course." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc", + "metadata": {}, + "outputs": [], + "source": [ + "# define sampling function for DDIM \n", + "# removes the noise using ddim\n", + "def denoise_ddim(x, t, t_prev, pred_noise):\n", + " ab = ab_t[t]\n", + " ab_prev = ab_t[t_prev]\n", + " \n", + " x0_pred = ab_prev.sqrt() / ab.sqrt() * (x - (1 - ab).sqrt() * pred_noise)\n", + " dir_xt = (1 - ab_prev).sqrt() * pred_noise\n", + "\n", + " return x0_pred + dir_xt" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5fdfc048-47f0-43b5-983e-da715e1ed562", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# fast sampling algorithm with context\n", + "@torch.no_grad()\n", + "def sample_ddim_context(samples, context, n=25): \n", + " # array to keep track of generated steps for plotting\n", + " intermediate = [] \n", + " step_size = timesteps // n\n", + " for i in range(timesteps, 0, -step_size):\n", + " print(f'sampling timestep {i:3d}', end='\\r')\n", + "\n", + " # reshape time tensor\n", + " t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n", + "\n", + " eps = nn_model(samples, t, c=context) # predict noise e_(x_t,t)\n", + " samples = denoise_ddim(samples, i, i - step_size, eps)\n", + " intermediate.append(samples.detach().cpu().numpy())\n", + "\n", + " intermediate = np.stack(intermediate)\n", + " return samples.clip(-1, 1), intermediate" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "aee10774-ff79-4df7-9b2d-1908561c23e5", + "metadata": {}, + "source": [ + "Let's create a `wandb.Table` to store our generations" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2", + "metadata": {}, + "outputs": [], + "source": [ + "table = wandb.Table(columns=[\"input_noise\", \"ddpm\", \"ddim\", \"class\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "90b838be-8fa1-4c12-9c4f-e40dfacc08e1", + "metadata": {}, + "source": [ + "let's compute ddpm samples as before" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "89e24210-4885-4559-92e1-db10566ef5ea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampling timestep 1\r" + ] + } + ], + "source": [ + "ddpm_samples, _ = sample_ddpm_context(noises, ctx_vector)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "836584a1-26b5-45b1-98c9-0c45d639c8f9", + "metadata": {}, + "source": [ + "For DDIM we can control the step size by the `n` param:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "25b07c26-0ac2-428a-8351-34f8b7228074", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampling timestep 20\r" + ] + } + ], + "source": [ + "ddim_samples, _ = sample_ddim_context(noises, ctx_vector, n=ddim_n)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "af33d3c4", + "metadata": {}, + "outputs": [], + "source": [ + "def ctx_to_classes(ctx_vector):\n", + " classes = \"hero,non-hero,food,spell,side-facing\".split(\",\")\n", + " return [classes[i] for i in [ctx_vector[i].argmax().item() for i in range(ctx_vector.shape[0])]]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "daea8275-0356-452e-a9f9-2824ef53f1ea", + "metadata": {}, + "source": [ + "Let's keep track of the sampling params on a dictionary" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "85be303d-0f0b-4df4-8c87-bd1bfb6145a2", + "metadata": {}, + "source": [ + "We can add the rows to the table one by one, we also cast images to `wandb.Image` so we can render them correctly in the UI" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3", + "metadata": {}, + "outputs": [], + "source": [ + "for noise, ddpm_sample, ddim_sample, c in zip(noises, ddpm_samples, ddim_samples, ctx_to_classes(ctx_vector)):\n", + " table.add_data(wandb.Image(noise), \n", + " wandb.Image(ddpm_sample), \n", + " wandb.Image(ddim_sample),\n", + " c)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "987cee86-2db1-4a2a-9d14-f70c6248ecb9", + "metadata": {}, + "source": [ + "we log the table to W&B, we can also use `wandb.init` as a context manager, this way we ensure that the run is finished when exiting the manager" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.4" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /Users/tcapelle/work/dlai/wandb_diffusion/wandb/run-20230704_142746-xfz2uh0q" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run winter-feather-1 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/capecape/debug_dlai" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Waiting for W&B process to finish... (success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run winter-feather-1 at: https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q
Synced 6 W&B file(s), 1 media file(s), 97 artifact file(s) and 1 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20230704_142746-xfz2uh0q/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with wandb.init(project=project, entity=entity, job_type=\"samplers_battle\", config=config):\n", + " wandb.log({\"samplers_tables\":table})" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a974258a-55fb-43ef-9136-985ec85bc3fc", + "metadata": {}, + "source": [ + "## Mixing classes during sampling" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sampling timestep 1\r" + ] + } + ], + "source": [ + "ctx = torch.tensor([\n", + " # hero, non-hero, food, spell, side-facing\n", + " [1,0,0,0,0], #human\n", + " [1,0,0.6,0,0], \n", + " [0,0,0.6,0.4,0], \n", + " [1,0,0,0,1], \n", + " [1,1,0,0,0],\n", + " [1,0,0,1,0]\n", + "]).float().to(device)\n", + "\n", + "# let's pass the same noise everytime\n", + "samples = torch.cat([torch.randn(1, 3, height, height)]*6, axis=0).to(device) \n", + "ddpm_samples, _ = sample_ddpm_context(samples, ctx)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "626ef616-dae4-4417-9219-d67ef0794e63", + "metadata": {}, + "outputs": [], + "source": [ + "hero_table = wandb.Table(columns=[\"generation\", \"hero\", \"non-hero\", \"food\", \"spell\", \"side-facing\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d", + "metadata": {}, + "outputs": [], + "source": [ + "for s, c in zip(ddpm_samples, ctx.cpu().numpy().tolist()):\n", + " hero_table.add_data(wandb.Image(s), *c)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.4" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /Users/tcapelle/work/dlai/wandb_diffusion/wandb/run-20230704_142806-sv5fvps1" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run summer-spaceship-2 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/capecape/debug_dlai" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Waiting for W&B process to finish... (success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run summer-spaceship-2 at: https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1
Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20230704_142806-sv5fvps1/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with wandb.init(project=project, entity=entity, job_type=\"sampling_mix\", config=config):\n", + " wandb.log({\"hero_table\":hero_table})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb new file mode 100644 index 00000000..0a030e22 --- /dev/null +++ b/dlai/03_llm_eval.ipynb @@ -0,0 +1,364 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2", + "metadata": {}, + "source": [ + "# LLM Tracing with W&B\n", + "\n", + "## 1. Auto-logging\n", + "\n", + "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B autologging, also available for other popular LLMs and libraries like ... " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98d52240-af93-4c87-a11e-309b23bdae9c", + "metadata": {}, + "outputs": [], + "source": [ + "# Install wandb-addons, this will be added to wandb soon\n", + "# !git clone https://github.com/soumik12345/wandb-addons.git\n", + "# !pip install ./wandb-addons[prompts] openai wandb -qqq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6512739b-fe35-4901-acb3-05df46b5ed9c", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import random\n", + "import time\n", + "import datetime\n", + "\n", + "import openai\n", + "import tiktoken\n", + "\n", + "from pathlib import Path\n", + "from pprint import pprint\n", + "from getpass import getpass\n", + "\n", + "from rich.markdown import Markdown\n", + "import pandas as pd\n", + "from tenacity import (\n", + " retry,\n", + " stop_after_attempt,\n", + " wait_random_exponential, # for exponential backoff\n", + ") \n", + "import wandb\n", + "from wandb.integration.openai import autolog\n", + "from wandb_addons.prompts import Trace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b", + "metadata": {}, + "outputs": [], + "source": [ + "autolog({\"project\":\"deeplearningai-llm\", \"job_type\": \"generation\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2ab394b-295b-4cfa-aade-aa274003a56a", + "metadata": {}, + "outputs": [], + "source": [ + "@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))\n", + "def completion_with_backoff(**kwargs):\n", + " return openai.ChatCompletion.create(**kwargs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "076e62a1-188f-47e1-bda9-5e3619e7d4ba", + "metadata": {}, + "outputs": [], + "source": [ + "MODEL_NAME = \"gpt-3.5-turbo\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "736fe64f-5cca-4316-8842-588b948193de", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_and_print(system_prompt, user_prompt, n=5):\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": user_prompt},\n", + " ]\n", + " responses = completion_with_backoff(\n", + " model=MODEL_NAME,\n", + " messages=messages,\n", + " n = n,\n", + " )\n", + " for response in responses.choices:\n", + " generation = response.message.content\n", + " display(Markdown(generation))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "197a256e-834f-42ee-8680-0e5cc53903cb", + "metadata": {}, + "outputs": [], + "source": [ + "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "690e6e0a-193b-41c8-86c4-526f8061dd94", + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = \"\"\"You are a creative copywriter.\n", + "You're given a category of game asset, and your goal is to design a name of that asset.\n", + "The game is set in a fantasy world where everyone laughs and respects each other, while celebrating diversity.\"\"\"\n", + "user_prompt = \"hero\"\n", + "generate_and_print(system_prompt, user_prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8343121b-2d47-47d1-b343-ec2393b8f02f", + "metadata": {}, + "outputs": [], + "source": [ + "user_prompt = \"jewel\"\n", + "generate_and_print(system_prompt, user_prompt)" + ] + }, + { + "cell_type": "markdown", + "id": "16d6d513-389d-4c67-a942-a922bce6ff1a", + "metadata": {}, + "source": [ + "## 2. Using Tracer to log more complex chains\n", + "\n", + "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario. You can also use our native integration with libraries like Langchain or Llamaindex instead. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c9fd404-51fd-44cf-b41e-b81dc589a4af", + "metadata": {}, + "outputs": [], + "source": [ + "worlds = [\n", + " \"a mystic medieval island inhabited by intelligent and funny frogs\",\n", + " \"a modern castle sitting on top of a volcano in a faraway galaxy\",\n", + " \"a digital world inhabited by friendly machine learning engineers\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d366dca-db12-4532-a98d-5b29fa8a0b71", + "metadata": {}, + "outputs": [], + "source": [ + "random.choice(worlds)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0db1e20a-87a8-4386-9a8d-727db9569cd7", + "metadata": {}, + "outputs": [], + "source": [ + "# define your conifg\n", + "model_name = \"gpt-3.5-turbo\"\n", + "temperature = 0.7\n", + "system_message = \"\"\"You are a creative copywriter. \n", + "You're given a category of game asset, a fantasy world, and your goal is to design a name of that asset.\n", + "Provide the resulting name only, no additional description.\n", + "Single name, max 3 words output, remember!\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a86f95e-ed0d-4989-8c1d-5b88cdac7999", + "metadata": {}, + "outputs": [], + "source": [ + "def run_creative_chain(query):\n", + " # part 1 - a chain is started...\n", + " start_time_ms = round(datetime.datetime.now().timestamp() * 1000)\n", + "\n", + " root_span = Trace(\n", + " name=\"MyCreativeChain\",\n", + " kind=\"agent\",\n", + " start_time_ms=start_time_ms,\n", + " metadata={\"user\": \"student_1\"})\n", + "\n", + " # part 2 - The chain calls into a child chain..\n", + " chain_span = Trace(\n", + " name=\"MyChain\",\n", + " kind=\"chain\",\n", + " start_time_ms=start_time_ms)\n", + "\n", + " # add the Chain span as a child of the root\n", + " root_span.add_child(chain_span)\n", + "\n", + " # part 3 - your chain picks a fantasy world\n", + " time.sleep(3)\n", + " world = random.choice(worlds)\n", + " expanded_prompt = f'Game asset category: {query}; fantasy world description: {world}'\n", + " tool_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)\n", + "\n", + " # create a Tool span \n", + " tool_span = Trace(\n", + " name=\"WorldPicker\",\n", + " kind=\"tool\",\n", + " status_code=\"success\",\n", + " start_time_ms=start_time_ms,\n", + " end_time_ms=tool_end_time_ms,\n", + " inputs={\"input\": query},\n", + " outputs={\"result\": expanded_prompt})\n", + "\n", + " # add the TOOL span as a child of the root\n", + " chain_span.add_child(tool_span)\n", + "\n", + " # part 4 - the LLMChain calls an OpenAI LLM...\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_message},\n", + " {\"role\": \"user\", \"content\": expanded_prompt}\n", + " ]\n", + "\n", + " response = openai.ChatCompletion.create(model=model_name,\n", + " messages=messages,\n", + " temperature=temperature) \n", + "\n", + " llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)\n", + " response_text = response[\"choices\"][0][\"message\"][\"content\"]\n", + " token_usage = response[\"usage\"].to_dict()\n", + "\n", + " llm_span = Trace(\n", + " name=\"OpenAI\",\n", + " kind=\"llm\",\n", + " status_code=\"success\",\n", + " metadata={\"temperature\":temperature,\n", + " \"token_usage\": token_usage, \n", + " \"model_name\":model_name},\n", + " start_time_ms=tool_end_time_ms,\n", + " end_time_ms=llm_end_time_ms,\n", + " inputs={\"system_prompt\":system_message, \"query\":expanded_prompt},\n", + " outputs={\"response\": response_text},\n", + " )\n", + "\n", + " # add the LLM span as a child of the Chain span...\n", + " chain_span.add_child(llm_span)\n", + "\n", + " # update the end time of the Chain span\n", + " chain_span.add_inputs_and_outputs(\n", + " inputs={\"query\":query},\n", + " outputs={\"response\": response_text})\n", + "\n", + " # update the Chain span's end time\n", + " chain_span._span.end_time_ms = llm_end_time_ms\n", + "\n", + " # part 5 - the final results from the tool are added \n", + " root_span.add_inputs_and_outputs(inputs={\"query\": query},\n", + " outputs={\"result\": response_text})\n", + " root_span._span.end_time_ms = llm_end_time_ms\n", + "\n", + " # part 6 - log all spans to W&B by logging the root span\n", + " root_span.log(name=\"creative_trace\")\n", + " print(f\"Result: {response_text}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8500843-6d4b-4fc6-93b9-4cadf5813e4a", + "metadata": {}, + "outputs": [], + "source": [ + "openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n", + "\n", + "run_creative_chain(\"hero\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "538d7bf3-4ae1-4b57-8a96-a34ea0614ec3", + "metadata": {}, + "outputs": [], + "source": [ + "run_creative_chain(\"jewel\")" + ] + }, + { + "cell_type": "markdown", + "id": "1ccc075f-32bf-4451-b7ad-ab2a49cc86b6", + "metadata": {}, + "source": [ + "## Langchain agent\n", + "\n", + "WIP: add langchain agent - adding names and evaluating if they are good. Wrap a previous function as a langchain tool. \n", + "\n", + "Demonstrate W&B Tracer autologging. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45de1fb0-3630-4673-8ac0-0dffe0a52071", + "metadata": {}, + "outputs": [], + "source": [ + "# " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82c7ab14-4335-4649-95b4-35fb8023af1d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dlai/README.md b/dlai/README.md new file mode 100644 index 00000000..a20f4a8d --- /dev/null +++ b/dlai/README.md @@ -0,0 +1,13 @@ +[![](https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-gradient.svg)](https://wandb.ai/capecape/dlai_diffusion) + +# DLAI with W&B 😎 + +We instrument various notebooks from the generative AI course with W&B to track metrics, hyperparameters, and artifacts. + +- [00_intro](00_intro.ipynb) In this notebooks we learn about using wegiths and biases! we train a simple classifier on the Sprites datasets and log the results to W&B. +- [01_diffusion_training](01_diffusion_training.ipynb) In this notebook we train a diffusion model to generate images from the Sprites dataset. We log the training metrics to W&B. We sample from the model and log the images to W&B. +- [02_diffusion_sampling](02_diffusion_sampling.ipynb) In this notebook we sample from the trained model and log the images to W&B. We compare different sampling methods and log the results. +- [03 LLM evaluation and debugging](03_llm_eval.ipynb) In this notebook we generate character names using LLMs and use W&B autologgers and Tracer to evaluate and debug our generations. +- [04 WIP]() We are planning to add a CPU-based LLM finetuning notebook with a small LLM finetuned for generating names + +The W&B dashboard: https://wandb.ai/capecape/dlai_diffusion \ No newline at end of file diff --git a/dlai/requirements.txt b/dlai/requirements.txt new file mode 100644 index 00000000..886e2b28 --- /dev/null +++ b/dlai/requirements.txt @@ -0,0 +1,7 @@ +torch>=2.0 +torchvision>=0.15 +matplotlib +pandas +numpy +wandb +tqdm \ No newline at end of file diff --git a/dlai/utilities.py b/dlai/utilities.py new file mode 100644 index 00000000..29ce5bc9 --- /dev/null +++ b/dlai/utilities.py @@ -0,0 +1,376 @@ +import os +import random + +import matplotlib.pyplot as plt +import numpy as np +import torch +import torch.nn as nn +import torchvision.transforms as transforms +from matplotlib.animation import FuncAnimation, PillowWriter +from PIL import Image +from torch.utils.data import Dataset +from torchvision.utils import make_grid, save_image + + +def _fig_bounds(x): + r = x//32 + return min(5, max(1,r)) + +def show_image(im, ax=None, figsize=None, title=None, **kwargs): + "Show a PIL or PyTorch image on `ax`." + cmap=None + # Handle pytorch axis order + if isinstance(im, torch.Tensor): + im = im.data.cpu() + if im.shape[0]<5: im=im.permute(1,2,0) + elif not isinstance(im, np.ndarray): + im=np.array(im) + # Handle 1-channel images + if im.shape[-1]==1: + cmap = "gray" + im=im[...,0] + + if figsize is None: + figsize = (_fig_bounds(im.shape[0]), _fig_bounds(im.shape[1])) + if ax is None: + _,ax = plt.subplots(figsize=figsize) + ax.imshow(im, cmap=cmap, **kwargs) + if title is not None: + ax.set_title(title) + ax.axis('off') + return ax + +class ContextUnet(nn.Module): + def __init__(self, in_channels, n_feat=256, n_cfeat=10, height=28): # cfeat - context features + super(ContextUnet, self).__init__() + + # number of input channels, number of intermediate feature maps and number of classes + self.in_channels = in_channels + self.n_feat = n_feat + self.n_cfeat = n_cfeat + self.h = height #assume h == w. must be divisible by 4, so 28,24,20,16... + + # Initialize the initial convolutional layer + self.init_conv = ResidualConvBlock(in_channels, n_feat, is_res=True) + + # Initialize the down-sampling path of the U-Net with two levels + self.down1 = UnetDown(n_feat, n_feat) # down1 #[10, 256, 8, 8] + self.down2 = UnetDown(n_feat, 2 * n_feat) # down2 #[10, 256, 4, 4] + + # original: self.to_vec = nn.Sequential(nn.AvgPool2d(7), nn.GELU()) + self.to_vec = nn.Sequential(nn.AvgPool2d((4)), nn.GELU()) + + # Embed the timestep and context labels with a one-layer fully connected neural network + self.timeembed1 = EmbedFC(1, 2*n_feat) + self.timeembed2 = EmbedFC(1, 1*n_feat) + self.contextembed1 = EmbedFC(n_cfeat, 2*n_feat) + self.contextembed2 = EmbedFC(n_cfeat, 1*n_feat) + + # Initialize the up-sampling path of the U-Net with three levels + self.up0 = nn.Sequential( + nn.ConvTranspose2d(2 * n_feat, 2 * n_feat, self.h//4, self.h//4), # up-sample + nn.GroupNorm(8, 2 * n_feat), # normalize + nn.ReLU(), + ) + self.up1 = UnetUp(4 * n_feat, n_feat) + self.up2 = UnetUp(2 * n_feat, n_feat) + + # Initialize the final convolutional layers to map to the same number of channels as the input image + self.out = nn.Sequential( + nn.Conv2d(2 * n_feat, n_feat, 3, 1, 1), # reduce number of feature maps #in_channels, out_channels, kernel_size, stride=1, padding=0 + nn.GroupNorm(8, n_feat), # normalize + nn.ReLU(), + nn.Conv2d(n_feat, self.in_channels, 3, 1, 1), # map to same number of channels as input + ) + + def forward(self, x, t, c=None): + """ + x : (batch, n_feat, h, w) : input image + t : (batch, n_cfeat) : time step + c : (batch, n_classes) : context label + """ + # x is the input image, c is the context label, t is the timestep, context_mask says which samples to block the context on + + # pass the input image through the initial convolutional layer + x = self.init_conv(x) + # pass the result through the down-sampling path + down1 = self.down1(x) #[10, 256, 8, 8] + down2 = self.down2(down1) #[10, 256, 4, 4] + + # convert the feature maps to a vector and apply an activation + hiddenvec = self.to_vec(down2) + + # mask out context if context_mask == 1 + if c is None: + c = torch.zeros(x.shape[0], self.n_cfeat).to(x) + + # embed context and timestep + cemb1 = self.contextembed1(c).view(-1, self.n_feat * 2, 1, 1) # (batch, 2*n_feat, 1,1) + temb1 = self.timeembed1(t).view(-1, self.n_feat * 2, 1, 1) + cemb2 = self.contextembed2(c).view(-1, self.n_feat, 1, 1) + temb2 = self.timeembed2(t).view(-1, self.n_feat, 1, 1) + #print(f"uunet forward: cemb1 {cemb1.shape}. temb1 {temb1.shape}, cemb2 {cemb2.shape}. temb2 {temb2.shape}") + + + up1 = self.up0(hiddenvec) + up2 = self.up1(cemb1*up1 + temb1, down2) # add and multiply embeddings + up3 = self.up2(cemb2*up2 + temb2, down1) + out = self.out(torch.cat((up3, x), 1)) + return out + +class ResidualConvBlock(nn.Module): + def __init__( + self, in_channels: int, out_channels: int, is_res: bool = False + ) -> None: + super().__init__() + + # Check if input and output channels are the same for the residual connection + self.same_channels = in_channels == out_channels + + # Flag for whether or not to use residual connection + self.is_res = is_res + + # First convolutional layer + self.conv1 = nn.Sequential( + nn.Conv2d(in_channels, out_channels, 3, 1, 1), # 3x3 kernel with stride 1 and padding 1 + nn.BatchNorm2d(out_channels), # Batch normalization + nn.GELU(), # GELU activation function + ) + + # Second convolutional layer + self.conv2 = nn.Sequential( + nn.Conv2d(out_channels, out_channels, 3, 1, 1), # 3x3 kernel with stride 1 and padding 1 + nn.BatchNorm2d(out_channels), # Batch normalization + nn.GELU(), # GELU activation function + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + + # If using residual connection + if self.is_res: + # Apply first convolutional layer + x1 = self.conv1(x) + + # Apply second convolutional layer + x2 = self.conv2(x1) + + # If input and output channels are the same, add residual connection directly + if self.same_channels: + out = x + x2 + else: + # If not, apply a 1x1 convolutional layer to match dimensions before adding residual connection + shortcut = nn.Conv2d(x.shape[1], x2.shape[1], kernel_size=1, stride=1, padding=0).to(x.device) + out = shortcut(x) + x2 + #print(f"resconv forward: x {x.shape}, x1 {x1.shape}, x2 {x2.shape}, out {out.shape}") + + # Normalize output tensor + return out / 1.414 + + # If not using residual connection, return output of second convolutional layer + else: + x1 = self.conv1(x) + x2 = self.conv2(x1) + return x2 + + # Method to get the number of output channels for this block + def get_out_channels(self): + return self.conv2[0].out_channels + + # Method to set the number of output channels for this block + def set_out_channels(self, out_channels): + self.conv1[0].out_channels = out_channels + self.conv2[0].in_channels = out_channels + self.conv2[0].out_channels = out_channels + + + +class UnetUp(nn.Module): + def __init__(self, in_channels, out_channels): + super(UnetUp, self).__init__() + + # Create a list of layers for the upsampling block + # The block consists of a ConvTranspose2d layer for upsampling, followed by two ResidualConvBlock layers + layers = [ + nn.ConvTranspose2d(in_channels, out_channels, 2, 2), + ResidualConvBlock(out_channels, out_channels), + ResidualConvBlock(out_channels, out_channels), + ] + + # Use the layers to create a sequential model + self.model = nn.Sequential(*layers) + + def forward(self, x, skip): + # Concatenate the input tensor x with the skip connection tensor along the channel dimension + x = torch.cat((x, skip), 1) + + # Pass the concatenated tensor through the sequential model and return the output + x = self.model(x) + return x + + +class UnetDown(nn.Module): + def __init__(self, in_channels, out_channels): + super(UnetDown, self).__init__() + + # Create a list of layers for the downsampling block + # Each block consists of two ResidualConvBlock layers, followed by a MaxPool2d layer for downsampling + layers = [ResidualConvBlock(in_channels, out_channels), ResidualConvBlock(out_channels, out_channels), nn.MaxPool2d(2)] + + # Use the layers to create a sequential model + self.model = nn.Sequential(*layers) + + def forward(self, x): + # Pass the input through the sequential model and return the output + return self.model(x) + +class EmbedFC(nn.Module): + def __init__(self, input_dim, emb_dim): + super(EmbedFC, self).__init__() + ''' + This class defines a generic one layer feed-forward neural network for embedding input data of + dimensionality input_dim to an embedding space of dimensionality emb_dim. + ''' + self.input_dim = input_dim + + # define the layers for the network + layers = [ + nn.Linear(input_dim, emb_dim), + nn.GELU(), + nn.Linear(emb_dim, emb_dim), + ] + + # create a PyTorch sequential model consisting of the defined layers + self.model = nn.Sequential(*layers) + + def forward(self, x): + # flatten the input tensor + x = x.view(-1, self.input_dim) + # apply the model layers to the flattened tensor + return self.model(x) + +def unorm(x): + # unity norm. results in range of [0,1] + # assume x (h,w,3) + xmax = x.max((0,1)) + xmin = x.min((0,1)) + return(x - xmin)/(xmax - xmin) + +def norm_all(store, n_t, n_s): + # runs unity norm on all timesteps of all samples + nstore = np.zeros_like(store) + for t in range(n_t): + for s in range(n_s): + nstore[t,s] = unorm(store[t,s]) + return nstore + +def norm_torch(x_all): + # runs unity norm on all timesteps of all samples + # input is (n_samples, 3,h,w), the torch image format + x = x_all.cpu().numpy() + xmax = x.max((2,3)) + xmin = x.min((2,3)) + xmax = np.expand_dims(xmax,(2,3)) + xmin = np.expand_dims(xmin,(2,3)) + nstore = (x - xmin)/(xmax - xmin) + return torch.from_numpy(nstore) + +def gen_tst_context(n_cfeat): + """ + Generate test context vectors + """ + vec = torch.tensor([ + [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1], [0,0,0,0,0], # human, non-human, food, spell, side-facing + [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1], [0,0,0,0,0], # human, non-human, food, spell, side-facing + [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1], [0,0,0,0,0], # human, non-human, food, spell, side-facing + [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1], [0,0,0,0,0], # human, non-human, food, spell, side-facing + [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1], [0,0,0,0,0], # human, non-human, food, spell, side-facing + [1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], [0,0,0,0,1], [0,0,0,0,0]] # human, non-human, food, spell, side-facing + ) + return len(vec), vec + +def plot_grid(x,n_sample,n_rows,save_dir,w): + # x:(n_sample, 3, h, w) + ncols = n_sample//n_rows + grid = make_grid(norm_torch(x), nrow=ncols) # curiously, nrow is number of columns.. or number of items in the row. + save_image(grid, save_dir + f"run_image_w{w}.png") + print('saved image at ' + save_dir + f"run_image_w{w}.png") + return grid + +def plot_sample(x_gen_store,n_sample,nrows,save_dir, fn, w, save=False): + ncols = n_sample//nrows + sx_gen_store = np.moveaxis(x_gen_store,2,4) # change to Numpy image format (h,w,channels) vs (channels,h,w) + nsx_gen_store = norm_all(sx_gen_store, sx_gen_store.shape[0], n_sample) # unity norm to put in range [0,1] for np.imshow + + # create gif of images evolving over time, based on x_gen_store + fig, axs = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, sharey=True,figsize=(ncols,nrows)) + def animate_diff(i, store): + print(f'gif animating frame {i} of {store.shape[0]}', end='\r') + plots = [] + for row in range(nrows): + for col in range(ncols): + axs[row, col].clear() + axs[row, col].set_xticks([]) + axs[row, col].set_yticks([]) + plots.append(axs[row, col].imshow(store[i,(row*ncols)+col])) + return plots + ani = FuncAnimation(fig, animate_diff, fargs=[nsx_gen_store], interval=200, blit=False, repeat=True, frames=nsx_gen_store.shape[0]) + plt.close() + if save: + ani.save(save_dir + f"{fn}_w{w}.gif", dpi=100, writer=PillowWriter(fps=5)) + print('saved gif at ' + save_dir + f"{fn}_w{w}.gif") + return ani + + +default_tfms = transforms.Compose([ + transforms.ToTensor(), # from [0,255] to range [0.0,1.0] + transforms.RandomHorizontalFlip(), # randomly flip and rotate + transforms.Normalize((0.5,), (0.5,)) # range [-1,1] +]) + +class CustomDataset(Dataset): + def __init__(self, sprites, slabels, transform=default_tfms, null_context=False, argmax=False): + self.sprites = sprites + if argmax: + self.slabels = np.argmax(slabels, axis=1) + else: + self.slabels = slabels + print(f"sprite shape: {self.sprites.shape}") + print(f"labels shape: {self.slabels.shape}") + self.transform = transform + self.null_context = null_context + + @classmethod + def from_np(cls, sfilename, lfilename, transform=default_tfms, null_context=False, argmax=False): + sprites = np.load(sfilename) + slabels = np.load(lfilename) + return cls(sprites, slabels, transform, null_context, argmax) + + # Return the number of images in the dataset + def __len__(self): + return len(self.sprites) + + # Get the image and label at a given index + def __getitem__(self, idx): + # Return the image and label as a tuple + if self.transform: + image = self.transform(self.sprites[idx]) + if self.null_context: + label = torch.tensor(0).to(torch.int64) + else: + label = torch.tensor(self.slabels[idx]).to(torch.int64) + return (image, label) + + + def subset(self, slice_size=1000): + # return a subset of the dataset + indices = random.sample(range(len(self)), slice_size) + return CustomDataset(self.sprites[indices], self.slabels[indices], self.transform, self.null_context) + + def split(self, pct=0.2): + "split dataset into train and test" + train_size = int((1-pct)*len(self)) + test_size = len(self) - train_size + train_dataset, test_dataset = torch.utils.data.random_split(self, [train_size, test_size]) + return train_dataset, test_dataset + + From e0dbcbe9f6bfc7817f33ed3e32823a9c53868120 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Wed, 5 Jul 2023 18:18:08 +0200 Subject: [PATCH 02/43] data as lfs --- .gitattributes | 1 + dlai/data/sprite_labels_nc_1788_16x16.npy | 3 +++ dlai/data/sprites_1788_16x16.npy | 3 +++ 3 files changed, 7 insertions(+) create mode 100644 .gitattributes create mode 100644 dlai/data/sprite_labels_nc_1788_16x16.npy create mode 100644 dlai/data/sprites_1788_16x16.npy diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..36dce4d1 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.npy filter=lfs diff=lfs merge=lfs -text diff --git a/dlai/data/sprite_labels_nc_1788_16x16.npy b/dlai/data/sprite_labels_nc_1788_16x16.npy new file mode 100644 index 00000000..b5eec1e2 --- /dev/null +++ b/dlai/data/sprite_labels_nc_1788_16x16.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b71222bd58b57cd99b1b92d830393e86ce215e0f69602f2c82aad1522f030ed7 +size 3576128 diff --git a/dlai/data/sprites_1788_16x16.npy b/dlai/data/sprites_1788_16x16.npy new file mode 100644 index 00000000..1055e7de --- /dev/null +++ b/dlai/data/sprites_1788_16x16.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61cf3b7e3184f57f2bc2bf5e75fbcf08ba379241f58966c62a9716ef581b2916 +size 68659328 From 72f41473bfa2f9c0b7bab9439a8e0f453a17e35b Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Wed, 5 Jul 2023 18:44:00 +0200 Subject: [PATCH 03/43] add model ckpt --- .gitattributes | 1 + dlai/data/weights/context_model_trained.pth | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 dlai/data/weights/context_model_trained.pth diff --git a/.gitattributes b/.gitattributes index 36dce4d1..a93af385 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ *.npy filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text diff --git a/dlai/data/weights/context_model_trained.pth b/dlai/data/weights/context_model_trained.pth new file mode 100644 index 00000000..451319cf --- /dev/null +++ b/dlai/data/weights/context_model_trained.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51535239b6f3e953db8ff9631278c3e6b133a5a500780bda5092db620ca8f570 +size 5989463 From 4b80deb7da3a275d9ec0d452be2b28ed0838bf28 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 09:28:24 +0200 Subject: [PATCH 04/43] add wandb.login() --- dlai/01_diffusion_training.ipynb | 138 +++++++++++++++++++++++++++++-- dlai/02_diffusion_sampling.ipynb | 4 +- 2 files changed, 131 insertions(+), 11 deletions(-) diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index 46530b6b..264e6272 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -33,6 +33,16 @@ "import wandb" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "b88f9513", + "metadata": {}, + "outputs": [], + "source": [ + "# wandb.login() # uncomment if you want to login to wandb" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -61,7 +71,6 @@ "\n", "# network hyperparameters\n", "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n", - "# device = \"mps\"\n", "n_feat = 64 # 64 hidden dimension feature\n", "n_cfeat = 5 # context vector is of size 5\n", "height = 16 # 16x16 image\n", @@ -154,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "eb13689d", "metadata": {}, "outputs": [], @@ -184,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "8b0f5bed", "metadata": {}, "outputs": [], @@ -200,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "16085a65", "metadata": {}, "outputs": [], @@ -247,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "d88afdba", "metadata": { "tags": [] @@ -264,10 +273,123 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "f5f4af69", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.4" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /Users/tcapelle/work/edu/dlai/wandb/run-20230705_183609-09mz5kur" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run sparkling-frost-38 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/capecape/dlai_diffusion" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/capecape/dlai_diffusion/runs/09mz5kur" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b4565f2a74df4f878429f373cacde283", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/894 [00:00 23\u001b[0m optim\u001b[39m.\u001b[39;49mstep()\n\u001b[1;32m 25\u001b[0m \u001b[39m# we log the relevant metrics to the workspace\u001b[39;00m\n\u001b[1;32m 26\u001b[0m wandb\u001b[39m.\u001b[39mlog({\u001b[39m\"\u001b[39m\u001b[39mloss\u001b[39m\u001b[39m\"\u001b[39m: loss\u001b[39m.\u001b[39mitem(),\n\u001b[1;32m 27\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mlr\u001b[39m\u001b[39m\"\u001b[39m: optim\u001b[39m.\u001b[39mparam_groups[\u001b[39m0\u001b[39m][\u001b[39m'\u001b[39m\u001b[39mlr\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m 28\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mepoch\u001b[39m\u001b[39m\"\u001b[39m: ep})\n", + "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/optimizer.py:280\u001b[0m, in \u001b[0;36mOptimizer.profile_hook_step..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 277\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mfunc\u001b[39m}\u001b[39;00m\u001b[39m must return None or a tuple of (new_args, new_kwargs),\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 278\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mbut got \u001b[39m\u001b[39m{\u001b[39;00mresult\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 280\u001b[0m out \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 281\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_optimizer_step_code()\n\u001b[1;32m 283\u001b[0m \u001b[39m# call optimizer step post hooks\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/optimizer.py:33\u001b[0m, in \u001b[0;36m_use_grad_for_differentiable.._use_grad\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 32\u001b[0m torch\u001b[39m.\u001b[39mset_grad_enabled(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdefaults[\u001b[39m'\u001b[39m\u001b[39mdifferentiable\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[0;32m---> 33\u001b[0m ret \u001b[39m=\u001b[39m func(\u001b[39mself\u001b[39;49m, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 34\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 35\u001b[0m torch\u001b[39m.\u001b[39mset_grad_enabled(prev_grad)\n", + "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:141\u001b[0m, in \u001b[0;36mAdam.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m 130\u001b[0m beta1, beta2 \u001b[39m=\u001b[39m group[\u001b[39m'\u001b[39m\u001b[39mbetas\u001b[39m\u001b[39m'\u001b[39m]\n\u001b[1;32m 132\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_group(\n\u001b[1;32m 133\u001b[0m group,\n\u001b[1;32m 134\u001b[0m params_with_grad,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 138\u001b[0m max_exp_avg_sqs,\n\u001b[1;32m 139\u001b[0m state_steps)\n\u001b[0;32m--> 141\u001b[0m adam(\n\u001b[1;32m 142\u001b[0m params_with_grad,\n\u001b[1;32m 143\u001b[0m grads,\n\u001b[1;32m 144\u001b[0m exp_avgs,\n\u001b[1;32m 145\u001b[0m exp_avg_sqs,\n\u001b[1;32m 146\u001b[0m max_exp_avg_sqs,\n\u001b[1;32m 147\u001b[0m state_steps,\n\u001b[1;32m 148\u001b[0m amsgrad\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mamsgrad\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 149\u001b[0m beta1\u001b[39m=\u001b[39;49mbeta1,\n\u001b[1;32m 150\u001b[0m beta2\u001b[39m=\u001b[39;49mbeta2,\n\u001b[1;32m 151\u001b[0m lr\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mlr\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 152\u001b[0m weight_decay\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mweight_decay\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 153\u001b[0m eps\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39meps\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 154\u001b[0m maximize\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mmaximize\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 155\u001b[0m foreach\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mforeach\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 156\u001b[0m capturable\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mcapturable\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 157\u001b[0m differentiable\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mdifferentiable\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 158\u001b[0m fused\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mfused\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 159\u001b[0m grad_scale\u001b[39m=\u001b[39;49m\u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mgrad_scale\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 160\u001b[0m found_inf\u001b[39m=\u001b[39;49m\u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mfound_inf\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 161\u001b[0m )\n\u001b[1;32m 163\u001b[0m \u001b[39mreturn\u001b[39;00m loss\n", + "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:281\u001b[0m, in \u001b[0;36madam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach, capturable, differentiable, fused, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 279\u001b[0m func \u001b[39m=\u001b[39m _single_tensor_adam\n\u001b[0;32m--> 281\u001b[0m func(params,\n\u001b[1;32m 282\u001b[0m grads,\n\u001b[1;32m 283\u001b[0m exp_avgs,\n\u001b[1;32m 284\u001b[0m exp_avg_sqs,\n\u001b[1;32m 285\u001b[0m max_exp_avg_sqs,\n\u001b[1;32m 286\u001b[0m state_steps,\n\u001b[1;32m 287\u001b[0m amsgrad\u001b[39m=\u001b[39;49mamsgrad,\n\u001b[1;32m 288\u001b[0m beta1\u001b[39m=\u001b[39;49mbeta1,\n\u001b[1;32m 289\u001b[0m beta2\u001b[39m=\u001b[39;49mbeta2,\n\u001b[1;32m 290\u001b[0m lr\u001b[39m=\u001b[39;49mlr,\n\u001b[1;32m 291\u001b[0m weight_decay\u001b[39m=\u001b[39;49mweight_decay,\n\u001b[1;32m 292\u001b[0m eps\u001b[39m=\u001b[39;49meps,\n\u001b[1;32m 293\u001b[0m maximize\u001b[39m=\u001b[39;49mmaximize,\n\u001b[1;32m 294\u001b[0m capturable\u001b[39m=\u001b[39;49mcapturable,\n\u001b[1;32m 295\u001b[0m differentiable\u001b[39m=\u001b[39;49mdifferentiable,\n\u001b[1;32m 296\u001b[0m grad_scale\u001b[39m=\u001b[39;49mgrad_scale,\n\u001b[1;32m 297\u001b[0m found_inf\u001b[39m=\u001b[39;49mfound_inf)\n", + "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:344\u001b[0m, in \u001b[0;36m_single_tensor_adam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize, capturable, differentiable)\u001b[0m\n\u001b[1;32m 341\u001b[0m param \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mview_as_real(param)\n\u001b[1;32m 343\u001b[0m \u001b[39m# Decay the first and second moment running average coefficient\u001b[39;00m\n\u001b[0;32m--> 344\u001b[0m exp_avg\u001b[39m.\u001b[39;49mmul_(beta1)\u001b[39m.\u001b[39madd_(grad, alpha\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m beta1)\n\u001b[1;32m 345\u001b[0m exp_avg_sq\u001b[39m.\u001b[39mmul_(beta2)\u001b[39m.\u001b[39maddcmul_(grad, grad\u001b[39m.\u001b[39mconj(), value\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m beta2)\n\u001b[1;32m 347\u001b[0m \u001b[39mif\u001b[39;00m capturable \u001b[39mor\u001b[39;00m differentiable:\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], "source": [ "# same code as before, added comments on the extra W&B instrumentation lines\n", "# create a wandb run\n", @@ -301,7 +423,7 @@ " # save model periodically\n", " if ep%4==0 or ep == int(n_epoch-1):\n", " nn_model.eval()\n", - " ckpt_file = save_dir + f\"context_model_{ep}.pth\"\n", + " ckpt_file = save_dir + f\"context_model.pth\"\n", " torch.save(nn_model.state_dict(), ckpt_file)\n", "\n", " # save model to wandb as an Artifact\n", diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index 23cd1f26..0cc8f093 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -61,8 +61,6 @@ "\n", "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n", "\n", - "device = \"mps\"\n", - "\n", "# we are storing the parameters in a dictionary to be logged to wandb\n", "config = dict(\n", " timesteps=timesteps,\n", @@ -101,7 +99,7 @@ " producer_run = artifact.logged_by()\n", "\n", " # load the weights dictionary\n", - " model_weights = torch.load(model_path + f\"/context_model_31.pth\", map_location=\"cpu\")\n", + " model_weights = torch.load(model_path + f\"/context_model.pth\", map_location=\"cpu\")\n", "\n", " # create the model\n", " model = ContextUnet(in_channels=3, \n", From ba00e4ec22bc29b447af86c8526c163831a05ab0 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 11:21:15 +0000 Subject: [PATCH 05/43] llm train nb --- dlai/04_train_llm.ipynb | 575 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 575 insertions(+) create mode 100644 dlai/04_train_llm.ipynb diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb new file mode 100644 index 00000000..ae128ebc --- /dev/null +++ b/dlai/04_train_llm.ipynb @@ -0,0 +1,575 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 40, + "id": "fe0ed738-872b-4952-baf8-b3f7c92214a0", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install transformers accelerate dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "f64e095c-d28b-44aa-a122-4121c5c66a5b", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from transformers import AutoTokenizer\n", + "from datasets import load_dataset\n", + "from transformers import AutoModelForCausalLM\n", + "from transformers import Trainer, TrainingArguments" + ] + }, + { + "cell_type": "markdown", + "id": "3fd80268-c4a1-4e1a-aed3-cd5c3ab4d48f", + "metadata": {}, + "source": [ + "Load a dataset from Huggingface" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7535b8b-d220-44e8-a56c-97e250c36596", + "metadata": {}, + "outputs": [], + "source": [ + "ds = load_dataset('wikitext', 'wikitext-2-raw-v1')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "2286ae41-213d-480d-a4ba-8c4e2e1c4771", + "metadata": {}, + "outputs": [], + "source": [ + "model_checkpoint = \"roneneldan/TinyStories-1M\" # distilgpt2" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "26dfa0b7-8d9f-44f3-9e09-bc12bcb5ae0b", + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)" + ] + }, + { + "cell_type": "markdown", + "id": "f4f7a79d-9519-4133-a8cd-0a2bc59ee97b", + "metadata": {}, + "source": [ + "We can now call the tokenizer on all our texts. This is very simple, using the map method from the Datasets library. First we define a function that call the tokenizer on our texts:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "0ea05869-8ece-4a82-b9d4-3a62a84b6a77", + "metadata": {}, + "outputs": [], + "source": [ + "def tokenize_function(examples):\n", + " return tokenizer(examples[\"text\"])" + ] + }, + { + "cell_type": "markdown", + "id": "0413ebca-019b-49dc-b042-cf3cb20bf26c", + "metadata": {}, + "source": [ + "Then we apply it to all the splits in our `datasets` object, using `batched=True` and 4 processes to speed up the preprocessing. We won't need the `text` column afterward, so we discard it." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "22368c91-ddf8-4b08-848e-f732ff155494", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map (num_proc=4): 0%| | 0/4358 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_104512-3u6izcp0" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run fallen-voice-1 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/anony-moose-57595088200203951/tiny-stories?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/anony-moose-57595088200203951/tiny-stories/runs/3u6izcp0?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Do NOT share these links with anyone. They can be used to claim your runs." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [7002/7002 03:51, Epoch 3/3]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EpochTraining LossValidation Loss
16.4719006.374047
25.8121006.196322
35.6599006.153089

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "TrainOutput(global_step=7002, training_loss=6.346599991952852, metrics={'train_runtime': 237.6024, 'train_samples_per_second': 235.679, 'train_steps_per_second': 29.469, 'total_flos': 17136527671296.0, 'train_loss': 6.346599991952852, 'epoch': 3.0})" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainer.train()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36f0eef0-8729-4d32-a2aa-b1577847f7f5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 8235b76357cb708986b8bea0aa53325d19f331fb Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 11:23:33 +0000 Subject: [PATCH 06/43] split wandb calls --- dlai/03_llm_eval.ipynb | 502 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 469 insertions(+), 33 deletions(-) diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb index 0a030e22..e4d6cdef 100644 --- a/dlai/03_llm_eval.ipynb +++ b/dlai/03_llm_eval.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "98d52240-af93-4c87-a11e-309b23bdae9c", "metadata": {}, "outputs": [], @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "6512739b-fe35-4901-acb3-05df46b5ed9c", "metadata": {}, "outputs": [], @@ -57,17 +57,108 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b", + "execution_count": 3, + "id": "83639bac-5860-4db1-9867-7c89f3ca25a6", "metadata": {}, "outputs": [], "source": [ - "autolog({\"project\":\"deeplearningai-llm\", \"job_type\": \"generation\"})" + "PROJECT = \"deeplearningai-llm\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, + "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33manony-moose-57595088200203951\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.5" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_085050-g2v28ryo" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run revived-dew-9 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Do NOT share these links with anyone. They can be used to claim your runs." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "autolog({\"project\":PROJECT, \"job_type\": \"generation\", \"anonymous\":\"allow\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "b2ab394b-295b-4cfa-aade-aa274003a56a", "metadata": {}, "outputs": [], @@ -79,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "076e62a1-188f-47e1-bda9-5e3619e7d4ba", "metadata": {}, "outputs": [], @@ -89,7 +180,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "736fe64f-5cca-4316-8842-588b948193de", "metadata": {}, "outputs": [], @@ -111,20 +202,93 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "197a256e-834f-42ee-8680-0e5cc53903cb", "metadata": {}, "outputs": [], "source": [ - "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" + "# openai.api_key = os.environ[\"OPENAI_API_KEY\"] # this probably needs some setup on their platform" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "690e6e0a-193b-41c8-86c4-526f8061dd94", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Artifacts logged anonymously cannot be claimed and expire after 7 days.\n" + ] + }, + { + "data": { + "text/html": [ + "

Euphoric Guardian                                                                                                  \n",
+       "
\n" + ], + "text/plain": [ + "Euphoric Guardian \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Harmony's Chosen                                                                                                   \n",
+       "
\n" + ], + "text/plain": [ + "Harmony's Chosen \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\"Hilaria: The Laughing Guardian\"                                                                                   \n",
+       "
\n" + ], + "text/plain": [ + "\"Hilaria: The Laughing Guardian\" \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Unity's Embrace                                                                                                    \n",
+       "
\n" + ], + "text/plain": [ + "Unity's Embrace \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Harmonic Defender                                                                                                  \n",
+       "
\n" + ], + "text/plain": [ + "Harmonic Defender \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "system_prompt = \"\"\"You are a creative copywriter.\n", "You're given a category of game asset, and your goal is to design a name of that asset.\n", @@ -135,15 +299,166 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "8343121b-2d47-47d1-b343-ec2393b8f02f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Artifacts logged anonymously cannot be claimed and expire after 7 days.\n" + ] + }, + { + "data": { + "text/html": [ + "
Harmony Gems                                                                                                       \n",
+       "
\n" + ], + "text/plain": [ + "Harmony Gems \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Gleam Haven                                                                                                        \n",
+       "
\n" + ], + "text/plain": [ + "Gleam Haven \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Gleaming Harmony Gem                                                                                               \n",
+       "
\n" + ], + "text/plain": [ + "Gleaming Harmony Gem \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Gleamstone                                                                                                         \n",
+       "
\n" + ], + "text/plain": [ + "Gleamstone \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Harmony Gems                                                                                                       \n",
+       "
\n" + ], + "text/plain": [ + "Harmony Gems \n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "user_prompt = \"jewel\"\n", "generate_and_print(system_prompt, user_prompt)" ] }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3266487e-150b-4dd8-9555-94e94a66aac1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Waiting for W&B process to finish... (success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "084faecfc3a6412baa45b2aca421f0e1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='0.015 MB of 0.015 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "

Run history:


usage/completion_tokens█▁
usage/elapsed_time█▁
usage/prompt_tokens▁█
usage/total_tokens█▁

Run summary:


usage/completion_tokens19
usage/elapsed_time0.74012
usage/prompt_tokens62
usage/total_tokens81

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run revived-dew-9 at: https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2
Synced 6 W&B file(s), 2 media file(s), 2 artifact file(s) and 0 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20230706_085050-g2v28ryo/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "wandb.finish()" + ] + }, { "cell_type": "markdown", "id": "16d6d513-389d-4c67-a942-a922bce6ff1a", @@ -156,7 +471,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "3c9fd404-51fd-44cf-b41e-b81dc589a4af", "metadata": {}, "outputs": [], @@ -170,17 +485,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "9d366dca-db12-4532-a98d-5b29fa8a0b71", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'a mystic medieval island inhabited by intelligent and funny frogs'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "random.choice(worlds)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "0db1e20a-87a8-4386-9a8d-727db9569cd7", "metadata": {}, "outputs": [], @@ -196,7 +522,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "1a86f95e-ed0d-4989-8c1d-5b88cdac7999", "metadata": {}, "outputs": [], @@ -283,32 +609,144 @@ " root_span._span.end_time_ms = llm_end_time_ms\n", "\n", " # part 6 - log all spans to W&B by logging the root span\n", - " root_span.log(name=\"creative_trace\")\n", + " root_span.log(name=\"trace\")\n", " print(f\"Result: {response_text}\")\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "f8500843-6d4b-4fc6-93b9-4cadf5813e4a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e28b19b74fd0478381e1f535cf6a2655", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670242549995842, max=1.0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.5" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_085107-3awycdve" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run faithful-leaf-10 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/3awycdve?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Do NOT share these links with anyone. They can be used to claim your runs." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result: Croakvalor\n" + ] + } + ], "source": [ - "openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n", + "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")\n", "\n", "run_creative_chain(\"hero\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "538d7bf3-4ae1-4b57-8a96-a34ea0614ec3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result: Volcanium Gem\n" + ] + } + ], "source": [ "run_creative_chain(\"jewel\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "45de1fb0-3630-4673-8ac0-0dffe0a52071", + "metadata": {}, + "outputs": [], + "source": [ + "wandb.finish()" + ] + }, { "cell_type": "markdown", "id": "1ccc075f-32bf-4451-b7ad-ab2a49cc86b6", @@ -322,19 +760,17 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "45de1fb0-3630-4673-8ac0-0dffe0a52071", + "cell_type": "markdown", + "id": "d05ae137-04ef-4f7c-9cdd-67b71b92db3a", "metadata": {}, - "outputs": [], "source": [ - "# " + "## Finish the run" ] }, { "cell_type": "code", "execution_count": null, - "id": "82c7ab14-4335-4649-95b4-35fb8023af1d", + "id": "5738431a-e281-4abf-9837-44fec6811ff4", "metadata": {}, "outputs": [], "source": [] @@ -342,7 +778,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -356,7 +792,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.11.3" } }, "nbformat": 4, From 1c419f7ca3276e6e56dc5ab9c3776ba7d22f2fc1 Mon Sep 17 00:00:00 2001 From: kldarek Date: Thu, 6 Jul 2023 13:42:55 +0200 Subject: [PATCH 07/43] add llm agent --- dlai/03_llm_eval.ipynb | 696 ++++++++++++----------------------------- 1 file changed, 199 insertions(+), 497 deletions(-) diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb index e4d6cdef..797527c1 100644 --- a/dlai/03_llm_eval.ipynb +++ b/dlai/03_llm_eval.ipynb @@ -1,20 +1,8 @@ { "cells": [ - { - "cell_type": "markdown", - "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2", - "metadata": {}, - "source": [ - "# LLM Tracing with W&B\n", - "\n", - "## 1. Auto-logging\n", - "\n", - "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B autologging, also available for other popular LLMs and libraries like ... " - ] - }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "98d52240-af93-4c87-a11e-309b23bdae9c", "metadata": {}, "outputs": [], @@ -24,9 +12,21 @@ "# !pip install ./wandb-addons[prompts] openai wandb -qqq" ] }, + { + "cell_type": "markdown", + "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2", + "metadata": {}, + "source": [ + "# LLM Tracing with W&B\n", + "\n", + "## 1. Auto-logging\n", + "\n", + "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B autologging, also available for other popular LLMs and libraries such as Cohere or HuggingFace Pipelines. " + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "6512739b-fe35-4901-acb3-05df46b5ed9c", "metadata": {}, "outputs": [], @@ -37,11 +37,6 @@ "import datetime\n", "\n", "import openai\n", - "import tiktoken\n", - "\n", - "from pathlib import Path\n", - "from pprint import pprint\n", - "from getpass import getpass\n", "\n", "from rich.markdown import Markdown\n", "import pandas as pd\n", @@ -57,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "83639bac-5860-4db1-9867-7c89f3ca25a6", "metadata": {}, "outputs": [], @@ -67,98 +62,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33manony-moose-57595088200203951\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" - ] - }, - { - "data": { - "text/html": [ - "Tracking run with wandb version 0.15.5" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_085050-g2v28ryo" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Syncing run revived-dew-9 to Weights & Biases (docs)
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View project at https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run at https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Do NOT share these links with anyone. They can be used to claim your runs." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "autolog({\"project\":PROJECT, \"job_type\": \"generation\", \"anonymous\":\"allow\"})" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "b2ab394b-295b-4cfa-aade-aa274003a56a", "metadata": {}, "outputs": [], @@ -170,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "076e62a1-188f-47e1-bda9-5e3619e7d4ba", "metadata": {}, "outputs": [], @@ -180,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "736fe64f-5cca-4316-8842-588b948193de", "metadata": {}, "outputs": [], @@ -202,93 +116,10 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "197a256e-834f-42ee-8680-0e5cc53903cb", - "metadata": {}, - "outputs": [], - "source": [ - "# openai.api_key = os.environ[\"OPENAI_API_KEY\"] # this probably needs some setup on their platform" - ] - }, - { - "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "690e6e0a-193b-41c8-86c4-526f8061dd94", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Artifacts logged anonymously cannot be claimed and expire after 7 days.\n" - ] - }, - { - "data": { - "text/html": [ - "
Euphoric Guardian                                                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "Euphoric Guardian \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Harmony's Chosen                                                                                                   \n",
-       "
\n" - ], - "text/plain": [ - "Harmony's Chosen \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\"Hilaria: The Laughing Guardian\"                                                                                   \n",
-       "
\n" - ], - "text/plain": [ - "\"Hilaria: The Laughing Guardian\" \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Unity's Embrace                                                                                                    \n",
-       "
\n" - ], - "text/plain": [ - "Unity's Embrace \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Harmonic Defender                                                                                                  \n",
-       "
\n" - ], - "text/plain": [ - "Harmonic Defender \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "system_prompt = \"\"\"You are a creative copywriter.\n", "You're given a category of game asset, and your goal is to design a name of that asset.\n", @@ -299,83 +130,10 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "8343121b-2d47-47d1-b343-ec2393b8f02f", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Artifacts logged anonymously cannot be claimed and expire after 7 days.\n" - ] - }, - { - "data": { - "text/html": [ - "
Harmony Gems                                                                                                       \n",
-       "
\n" - ], - "text/plain": [ - "Harmony Gems \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Gleam Haven                                                                                                        \n",
-       "
\n" - ], - "text/plain": [ - "Gleam Haven \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Gleaming Harmony Gem                                                                                               \n",
-       "
\n" - ], - "text/plain": [ - "Gleaming Harmony Gem \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Gleamstone                                                                                                         \n",
-       "
\n" - ], - "text/plain": [ - "Gleamstone \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Harmony Gems                                                                                                       \n",
-       "
\n" - ], - "text/plain": [ - "Harmony Gems \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "user_prompt = \"jewel\"\n", "generate_and_print(system_prompt, user_prompt)" @@ -383,78 +141,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "3266487e-150b-4dd8-9555-94e94a66aac1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Waiting for W&B process to finish... (success)." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "084faecfc3a6412baa45b2aca421f0e1", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='0.015 MB of 0.015 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "

Run history:


usage/completion_tokens█▁
usage/elapsed_time█▁
usage/prompt_tokens▁█
usage/total_tokens█▁

Run summary:


usage/completion_tokens19
usage/elapsed_time0.74012
usage/prompt_tokens62
usage/total_tokens81

" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run revived-dew-9 at: https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/g2v28ryo?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2
Synced 6 W&B file(s), 2 media file(s), 2 artifact file(s) and 0 other file(s)" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Find logs at: ./wandb/run-20230706_085050-g2v28ryo/logs" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "wandb.finish()" ] @@ -466,12 +156,12 @@ "source": [ "## 2. Using Tracer to log more complex chains\n", "\n", - "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario. You can also use our native integration with libraries like Langchain or Llamaindex instead. " + "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario." ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "3c9fd404-51fd-44cf-b41e-b81dc589a4af", "metadata": {}, "outputs": [], @@ -485,28 +175,7 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "9d366dca-db12-4532-a98d-5b29fa8a0b71", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'a mystic medieval island inhabited by intelligent and funny frogs'" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "random.choice(worlds)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "0db1e20a-87a8-4386-9a8d-727db9569cd7", "metadata": {}, "outputs": [], @@ -522,7 +191,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "1a86f95e-ed0d-4989-8c1d-5b88cdac7999", "metadata": {}, "outputs": [], @@ -533,20 +202,11 @@ "\n", " root_span = Trace(\n", " name=\"MyCreativeChain\",\n", - " kind=\"agent\",\n", + " kind=\"chain\",\n", " start_time_ms=start_time_ms,\n", " metadata={\"user\": \"student_1\"})\n", "\n", - " # part 2 - The chain calls into a child chain..\n", - " chain_span = Trace(\n", - " name=\"MyChain\",\n", - " kind=\"chain\",\n", - " start_time_ms=start_time_ms)\n", - "\n", - " # add the Chain span as a child of the root\n", - " root_span.add_child(chain_span)\n", - "\n", - " # part 3 - your chain picks a fantasy world\n", + " # part 2 - your chain picks a fantasy world\n", " time.sleep(3)\n", " world = random.choice(worlds)\n", " expanded_prompt = f'Game asset category: {query}; fantasy world description: {world}'\n", @@ -563,9 +223,9 @@ " outputs={\"result\": expanded_prompt})\n", "\n", " # add the TOOL span as a child of the root\n", - " chain_span.add_child(tool_span)\n", + " root_span.add_child(tool_span)\n", "\n", - " # part 4 - the LLMChain calls an OpenAI LLM...\n", + " # part 3 - the LLMChain calls an OpenAI LLM...\n", " messages=[\n", " {\"role\": \"system\", \"content\": system_message},\n", " {\"role\": \"user\", \"content\": expanded_prompt}\n", @@ -593,126 +253,27 @@ " )\n", "\n", " # add the LLM span as a child of the Chain span...\n", - " chain_span.add_child(llm_span)\n", + " root_span.add_child(llm_span)\n", "\n", " # update the end time of the Chain span\n", - " chain_span.add_inputs_and_outputs(\n", + " root_span.add_inputs_and_outputs(\n", " inputs={\"query\":query},\n", " outputs={\"response\": response_text})\n", "\n", " # update the Chain span's end time\n", - " chain_span._span.end_time_ms = llm_end_time_ms\n", - "\n", - " # part 5 - the final results from the tool are added \n", - " root_span.add_inputs_and_outputs(inputs={\"query\": query},\n", - " outputs={\"result\": response_text})\n", " root_span._span.end_time_ms = llm_end_time_ms\n", "\n", - " # part 6 - log all spans to W&B by logging the root span\n", - " root_span.log(name=\"trace\")\n", + " # part 4 - log all spans to W&B by logging the root span\n", + " root_span.log(name=\"creative_trace\")\n", " print(f\"Result: {response_text}\")\n" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "f8500843-6d4b-4fc6-93b9-4cadf5813e4a", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e28b19b74fd0478381e1f535cf6a2655", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670242549995842, max=1.0…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Tracking run with wandb version 0.15.5" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_085107-3awycdve" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Syncing run faithful-leaf-10 to Weights & Biases (docs)
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View project at https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run at https://wandb.ai/anony-moose-57595088200203951/deeplearningai-llm/runs/3awycdve?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Do NOT share these links with anyone. They can be used to claim your runs." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result: Croakvalor\n" - ] - } - ], + "outputs": [], "source": [ "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")\n", "\n", @@ -721,18 +282,10 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "538d7bf3-4ae1-4b57-8a96-a34ea0614ec3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result: Volcanium Gem\n" - ] - } - ], + "outputs": [], "source": [ "run_creative_chain(\"jewel\")" ] @@ -754,17 +307,27 @@ "source": [ "## Langchain agent\n", "\n", - "WIP: add langchain agent - adding names and evaluating if they are good. Wrap a previous function as a langchain tool. \n", - "\n", - "Demonstrate W&B Tracer autologging. " + "In the third scenario, we'll introduce an agent that will use tools such as WorldPicker and NameValidator to come up with the ultimate name. We will also use Langchain here and demonstrate its W&B integration." ] }, { - "cell_type": "markdown", - "id": "d05ae137-04ef-4f7c-9cdd-67b71b92db3a", + "cell_type": "code", + "execution_count": null, + "id": "726e0a6a-699b-434d-8c51-7542b4f981dd", "metadata": {}, + "outputs": [], "source": [ - "## Finish the run" + "# Import things that are needed generically\n", + "from langchain.agents import AgentType, initialize_agent\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.tools import BaseTool\n", + "\n", + "from typing import Optional\n", + "\n", + "from langchain.callbacks.manager import (\n", + " AsyncCallbackManagerForToolRun,\n", + " CallbackManagerForToolRun,\n", + ")" ] }, { @@ -773,12 +336,151 @@ "id": "5738431a-e281-4abf-9837-44fec6811ff4", "metadata": {}, "outputs": [], + "source": [ + "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac08f78b-0962-4d84-b39a-21ee5e5d606b", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"LANGCHAIN_WANDB_TRACING\"] = \"true\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "539bc081-d1e3-4376-a817-23aa1d7ab2b3", + "metadata": {}, + "outputs": [], + "source": [ + "class WorldPickerTool(BaseTool):\n", + " name = \"pick_world\"\n", + " description = \"pick a virtual game world for your character or item naming\"\n", + " worlds = [\n", + " \"a mystic medieval island inhabited by intelligent and funny frogs\",\n", + " \"a modern anthill featuring a cyber-ant queen and her cyber-ant-workers\",\n", + " \"a digital world inhabited by friendly machine learning engineers\"\n", + " ]\n", + "\n", + " def _run(\n", + " self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None\n", + " ) -> str:\n", + " \"\"\"Use the tool.\"\"\"\n", + " time.sleep(1)\n", + " return random.choice(self.worlds)\n", + "\n", + " async def _arun(\n", + " self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None\n", + " ) -> str:\n", + " \"\"\"Use the tool asynchronously.\"\"\"\n", + " raise NotImplementedError(\"custom_search does not support async\")\n", + " \n", + "class NameValidatorTool(BaseTool):\n", + " name = \"validate_name\"\n", + " description = \"validate if the name is properly generated\"\n", + "\n", + " def _run(\n", + " self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None\n", + " ) -> str:\n", + " \"\"\"Use the tool.\"\"\"\n", + " time.sleep(1)\n", + " if len(query) < 20:\n", + " return f\"This is a correct name: {query}\"\n", + " else:\n", + " return f\"This name is too long. It should be shorter than 20 characters.\"\n", + "\n", + " async def _arun(\n", + " self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None\n", + " ) -> str:\n", + " \"\"\"Use the tool asynchronously.\"\"\"\n", + " raise NotImplementedError(\"custom_search does not support async\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c515ee33-1d6f-47e7-aceb-845c363eee29", + "metadata": {}, + "outputs": [], + "source": [ + "llm = ChatOpenAI(temperature=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "989407f4-0e10-4446-90d1-992c3b4c9483", + "metadata": {}, + "outputs": [], + "source": [ + "tools = [WorldPickerTool(), NameValidatorTool()]\n", + "agent = initialize_agent(\n", + " tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d4bd42d-9c95-4e02-8679-99ca43d0aa71", + "metadata": {}, + "outputs": [], + "source": [ + "agent.run(\n", + " \"Find a virtual game world for me and imagine the name of a hero in that world\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbb5ea87-a9b9-462f-80bf-b56d681dec8c", + "metadata": {}, + "outputs": [], + "source": [ + "agent.run(\n", + " \"Find a virtual game world for me and imagine the name of a jewel in that world\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d101fcd-cd7d-4ede-ad95-412c1cd72e46", + "metadata": {}, + "outputs": [], + "source": [ + "agent.run(\n", + " \"Find a virtual game world for me and imagine the name of food in that world\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "486c688c-2ca2-4fe5-8f22-afd194b3e34d", + "metadata": {}, + "outputs": [], + "source": [ + "wandb.finish()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18b6af79-9de7-4bfd-b8ea-6b4f2b405d0a", + "metadata": {}, + "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -792,7 +494,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.8.13" } }, "nbformat": 4, From bff4b3127e598621fd32e1655ba98913fc27ce3e Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 12:28:04 +0000 Subject: [PATCH 08/43] final refactors --- dlai/01_diffusion_training.ipynb | 577 +++++++++++++++++++++++++++++-- dlai/02_diffusion_sampling.ipynb | 139 ++++---- 2 files changed, 619 insertions(+), 97 deletions(-) diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index 264e6272..9d3845e8 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "b88f9513", "metadata": {}, "outputs": [], @@ -49,12 +49,12 @@ "id": "7c0d229a", "metadata": {}, "source": [ - "# Setting Things Up" + "## Setting Things Up" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "54c3a942", "metadata": { "tags": [] @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "a705d0a8", "metadata": { "tags": [] @@ -128,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "6bc9001e", "metadata": { "tags": [] @@ -141,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "76c63b85", "metadata": {}, "outputs": [ @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "eb13689d", "metadata": {}, "outputs": [], @@ -193,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "8b0f5bed", "metadata": {}, "outputs": [], @@ -209,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "16085a65", "metadata": {}, "outputs": [], @@ -242,7 +242,7 @@ "id": "d9ed46d7", "metadata": {}, "source": [ - "# Training" + "## Training" ] }, { @@ -256,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "d88afdba", "metadata": { "tags": [] @@ -271,9 +271,19 @@ "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()" ] }, + { + "cell_type": "markdown", + "id": "26765a7e-4ddc-449e-95c3-54c58a564738", + "metadata": {}, + "source": [ + "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n", + "\n", + "### You can visit the result of this [training here](https://wandb.ai)" + ] + }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "f5f4af69", "metadata": {}, "outputs": [ @@ -282,13 +292,13 @@ "output_type": "stream", "text": [ "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ - "Tracking run with wandb version 0.15.4" + "Tracking run with wandb version 0.15.5" ], "text/plain": [ "" @@ -300,7 +310,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /Users/tcapelle/work/edu/dlai/wandb/run-20230705_183609-09mz5kur" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_120421-lqf74fua" ], "text/plain": [ "" @@ -312,7 +322,7 @@ { "data": { "text/html": [ - "Syncing run sparkling-frost-38 to Weights & Biases (docs)
" + "Syncing run daily-frost-1 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -324,7 +334,7 @@ { "data": { "text/html": [ - " View project at https://wandb.ai/capecape/dlai_diffusion" + " View project at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion" ], "text/plain": [ "" @@ -336,7 +346,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/capecape/dlai_diffusion/runs/09mz5kur" + " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua" ], "text/plain": [ "" @@ -348,7 +358,21 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b4565f2a74df4f878429f373cacde283", + "model_id": "6aef0bb639f74bdf97d56b0c0cd1ffc5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/32 [00:00 23\u001b[0m optim\u001b[39m.\u001b[39;49mstep()\n\u001b[1;32m 25\u001b[0m \u001b[39m# we log the relevant metrics to the workspace\u001b[39;00m\n\u001b[1;32m 26\u001b[0m wandb\u001b[39m.\u001b[39mlog({\u001b[39m\"\u001b[39m\u001b[39mloss\u001b[39m\u001b[39m\"\u001b[39m: loss\u001b[39m.\u001b[39mitem(),\n\u001b[1;32m 27\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mlr\u001b[39m\u001b[39m\"\u001b[39m: optim\u001b[39m.\u001b[39mparam_groups[\u001b[39m0\u001b[39m][\u001b[39m'\u001b[39m\u001b[39mlr\u001b[39m\u001b[39m'\u001b[39m],\n\u001b[1;32m 28\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mepoch\u001b[39m\u001b[39m\"\u001b[39m: ep})\n", - "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/optimizer.py:280\u001b[0m, in \u001b[0;36mOptimizer.profile_hook_step..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 277\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mfunc\u001b[39m}\u001b[39;00m\u001b[39m must return None or a tuple of (new_args, new_kwargs),\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 278\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mbut got \u001b[39m\u001b[39m{\u001b[39;00mresult\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 280\u001b[0m out \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 281\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_optimizer_step_code()\n\u001b[1;32m 283\u001b[0m \u001b[39m# call optimizer step post hooks\u001b[39;00m\n", - "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/optimizer.py:33\u001b[0m, in \u001b[0;36m_use_grad_for_differentiable.._use_grad\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 32\u001b[0m torch\u001b[39m.\u001b[39mset_grad_enabled(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdefaults[\u001b[39m'\u001b[39m\u001b[39mdifferentiable\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[0;32m---> 33\u001b[0m ret \u001b[39m=\u001b[39m func(\u001b[39mself\u001b[39;49m, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 34\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 35\u001b[0m torch\u001b[39m.\u001b[39mset_grad_enabled(prev_grad)\n", - "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:141\u001b[0m, in \u001b[0;36mAdam.step\u001b[0;34m(self, closure)\u001b[0m\n\u001b[1;32m 130\u001b[0m beta1, beta2 \u001b[39m=\u001b[39m group[\u001b[39m'\u001b[39m\u001b[39mbetas\u001b[39m\u001b[39m'\u001b[39m]\n\u001b[1;32m 132\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_init_group(\n\u001b[1;32m 133\u001b[0m group,\n\u001b[1;32m 134\u001b[0m params_with_grad,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 138\u001b[0m max_exp_avg_sqs,\n\u001b[1;32m 139\u001b[0m state_steps)\n\u001b[0;32m--> 141\u001b[0m adam(\n\u001b[1;32m 142\u001b[0m params_with_grad,\n\u001b[1;32m 143\u001b[0m grads,\n\u001b[1;32m 144\u001b[0m exp_avgs,\n\u001b[1;32m 145\u001b[0m exp_avg_sqs,\n\u001b[1;32m 146\u001b[0m max_exp_avg_sqs,\n\u001b[1;32m 147\u001b[0m state_steps,\n\u001b[1;32m 148\u001b[0m amsgrad\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mamsgrad\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 149\u001b[0m beta1\u001b[39m=\u001b[39;49mbeta1,\n\u001b[1;32m 150\u001b[0m beta2\u001b[39m=\u001b[39;49mbeta2,\n\u001b[1;32m 151\u001b[0m lr\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mlr\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 152\u001b[0m weight_decay\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mweight_decay\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 153\u001b[0m eps\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39meps\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 154\u001b[0m maximize\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mmaximize\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 155\u001b[0m foreach\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mforeach\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 156\u001b[0m capturable\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mcapturable\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 157\u001b[0m differentiable\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mdifferentiable\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 158\u001b[0m fused\u001b[39m=\u001b[39;49mgroup[\u001b[39m'\u001b[39;49m\u001b[39mfused\u001b[39;49m\u001b[39m'\u001b[39;49m],\n\u001b[1;32m 159\u001b[0m grad_scale\u001b[39m=\u001b[39;49m\u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mgrad_scale\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 160\u001b[0m found_inf\u001b[39m=\u001b[39;49m\u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mfound_inf\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[1;32m 161\u001b[0m )\n\u001b[1;32m 163\u001b[0m \u001b[39mreturn\u001b[39;00m loss\n", - "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:281\u001b[0m, in \u001b[0;36madam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, foreach, capturable, differentiable, fused, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 279\u001b[0m func \u001b[39m=\u001b[39m _single_tensor_adam\n\u001b[0;32m--> 281\u001b[0m func(params,\n\u001b[1;32m 282\u001b[0m grads,\n\u001b[1;32m 283\u001b[0m exp_avgs,\n\u001b[1;32m 284\u001b[0m exp_avg_sqs,\n\u001b[1;32m 285\u001b[0m max_exp_avg_sqs,\n\u001b[1;32m 286\u001b[0m state_steps,\n\u001b[1;32m 287\u001b[0m amsgrad\u001b[39m=\u001b[39;49mamsgrad,\n\u001b[1;32m 288\u001b[0m beta1\u001b[39m=\u001b[39;49mbeta1,\n\u001b[1;32m 289\u001b[0m beta2\u001b[39m=\u001b[39;49mbeta2,\n\u001b[1;32m 290\u001b[0m lr\u001b[39m=\u001b[39;49mlr,\n\u001b[1;32m 291\u001b[0m weight_decay\u001b[39m=\u001b[39;49mweight_decay,\n\u001b[1;32m 292\u001b[0m eps\u001b[39m=\u001b[39;49meps,\n\u001b[1;32m 293\u001b[0m maximize\u001b[39m=\u001b[39;49mmaximize,\n\u001b[1;32m 294\u001b[0m capturable\u001b[39m=\u001b[39;49mcapturable,\n\u001b[1;32m 295\u001b[0m differentiable\u001b[39m=\u001b[39;49mdifferentiable,\n\u001b[1;32m 296\u001b[0m grad_scale\u001b[39m=\u001b[39;49mgrad_scale,\n\u001b[1;32m 297\u001b[0m found_inf\u001b[39m=\u001b[39;49mfound_inf)\n", - "File \u001b[0;32m~/miniforge3/envs/pt2/lib/python3.10/site-packages/torch/optim/adam.py:344\u001b[0m, in \u001b[0;36m_single_tensor_adam\u001b[0;34m(params, grads, exp_avgs, exp_avg_sqs, max_exp_avg_sqs, state_steps, grad_scale, found_inf, amsgrad, beta1, beta2, lr, weight_decay, eps, maximize, capturable, differentiable)\u001b[0m\n\u001b[1;32m 341\u001b[0m param \u001b[39m=\u001b[39m torch\u001b[39m.\u001b[39mview_as_real(param)\n\u001b[1;32m 343\u001b[0m \u001b[39m# Decay the first and second moment running average coefficient\u001b[39;00m\n\u001b[0;32m--> 344\u001b[0m exp_avg\u001b[39m.\u001b[39;49mmul_(beta1)\u001b[39m.\u001b[39madd_(grad, alpha\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m beta1)\n\u001b[1;32m 345\u001b[0m exp_avg_sq\u001b[39m.\u001b[39mmul_(beta2)\u001b[39m.\u001b[39maddcmul_(grad, grad\u001b[39m.\u001b[39mconj(), value\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m beta2)\n\u001b[1;32m 347\u001b[0m \u001b[39mif\u001b[39;00m capturable \u001b[39mor\u001b[39;00m differentiable:\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/894 [00:00(success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c2bd6d2f891d4c69a4c285591b2bc1e4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='51.649 MB of 51.655 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=0.9998…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "

Run history:


epoch▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
loss█▆▅▅▄▃▃▃▃▃▄▃▃▃▃▃▃▂▂▂▃▂▂▂▂▂▂▂▂▃▃▂▂▁▁▁▂▂▁▁
lr████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁

Run summary:


epoch31
loss0.09235
lr3e-05

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run daily-frost-1 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua
Synced 6 W&B file(s), 288 media file(s), 9 artifact file(s) and 1 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20230706_120421-lqf74fua/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ "# same code as before, added comments on the extra W&B instrumentation lines\n", "# create a wandb run\n", - "run = wandb.init(project=\"dlai_diffusion\", job_type=\"train_conditional\", config=config)\n", + "run = wandb.init(project=\"sprite_diffusion\", job_type=\"train\", anonymous=\"allow\", config=config)\n", "\n", - "for ep in range(n_epoch):\n", + "for ep in tqdm(range(n_epoch), leave=True, total=n_epoch):\n", " # set into train mode\n", " nn_model.train()\n", " optim.param_groups[0]['lr'] = lrate*(1-ep/n_epoch)\n", @@ -440,6 +935,14 @@ "# finish W&B run\n", "wandb.finish()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "78ddaed3-8184-4161-a1d6-5af139b336d0", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -458,7 +961,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index 0cc8f093..f831e4bb 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 28, "id": "700e687c", "metadata": { "tags": [] @@ -29,6 +29,16 @@ "import wandb" ] }, + { + "cell_type": "code", + "execution_count": 2, + "id": "beab0dad-c211-4e3c-ab80-de52788f27e2", + "metadata": {}, + "outputs": [], + "source": [ + "# wandb.login() # uncomment if you want to login to wandb" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -40,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 29, "id": "54c3a942", "metadata": { "tags": [] @@ -48,14 +58,14 @@ "outputs": [], "source": [ "# Wandb Params\n", - "project = \"debug_dlai\"\n", - "entity = \"capecape\"\n", + "PROJECT = \"sprite_diffusion\"\n", + "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:v0\" \n", "\n", "# ddpm sampler hyperparameters\n", "timesteps = 500\n", "beta1 = 1e-4\n", "beta2 = 0.02\n", - "num_samples = 32\n", + "num_samples = 30\n", "height = 16\n", "ddim_n = 25\n", "\n", @@ -84,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 30, "id": "8ab66255", "metadata": {}, "outputs": [], @@ -99,7 +109,8 @@ " producer_run = artifact.logged_by()\n", "\n", " # load the weights dictionary\n", - " model_weights = torch.load(model_path + f\"/context_model.pth\", map_location=\"cpu\")\n", + " model_weights = torch.load(model_path + f\"/context_model.pth\", \n", + " map_location=\"cpu\")\n", "\n", " # create the model\n", " model = ContextUnet(in_channels=3, \n", @@ -117,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 31, "id": "b47633e2", "metadata": {}, "outputs": [ @@ -130,7 +141,7 @@ } ], "source": [ - "nn_model = load_model('capecape/dlai_diffusion/w1r7jpji_context_model:v8')" + "nn_model = load_model(MODEL_ARTIFACT)" ] }, { @@ -153,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 32, "id": "f6f479d1", "metadata": {}, "outputs": [], @@ -167,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 33, "id": "8b0f5bed", "metadata": {}, "outputs": [], @@ -181,15 +192,23 @@ " return mean + noise" ] }, + { + "cell_type": "markdown", + "id": "895b2bbc-46cd-4be8-a672-e84ef69bc329", + "metadata": {}, + "source": [ + "sample with context using standard algorithm\n", + "we make a change to the original algorithm to allow for context \n", + "and pass a fixed noise tensor (samples)" + ] + }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 34, "id": "16085a65", "metadata": {}, "outputs": [], "source": [ - "# sample with context using standard algorithm\n", - "# we make a change to the original algorithm to allow for context and passing a noise tensor (samples)\n", "@torch.no_grad()\n", "def sample_ddpm_context(samples, context, save_rate=20):\n", " # array to keep track of generated steps for plotting\n", @@ -201,7 +220,7 @@ " # sample some random noise to inject back in. For i = 1, don't add back in noise\n", " z = torch.randn_like(samples) if i > 1 else 0\n", "\n", - " eps = nn_model(samples, t, c=context) # predict noise e_(x_t,t, ctx)\n", + " eps = nn_model(samples, t, c=context) # predict noise\n", " samples = denoise_add_noise(samples, i, eps, z)\n", " if i % save_rate==0 or i==timesteps or i<8:\n", " print(f'sampling timestep {i:3d}', end='\\r')\n", @@ -222,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 35, "id": "d88afdba", "metadata": { "tags": [] @@ -234,7 +253,12 @@ "noises = torch.randn(num_samples, 3, height, height).to(device) \n", "\n", "# A fixed context vector to sample from\n", - "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()" + "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", + " 1,1,1,1,1,1,\n", + " 2,2,2,2,2,2,\n", + " 3,3,3,3,3,3,\n", + " 4,4,4,4,4,4]), \n", + " 5).to(device=device).float()" ] }, { @@ -248,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 36, "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc", "metadata": {}, "outputs": [], @@ -267,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 37, "id": "5fdfc048-47f0-43b5-983e-da715e1ed562", "metadata": { "tags": [] @@ -305,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 38, "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2", "metadata": {}, "outputs": [], @@ -324,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 39, "id": "89e24210-4885-4559-92e1-db10566ef5ea", "metadata": {}, "outputs": [ @@ -351,7 +375,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 40, "id": "25b07c26-0ac2-428a-8351-34f8b7228074", "metadata": {}, "outputs": [ @@ -369,14 +393,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 50, "id": "af33d3c4", "metadata": {}, "outputs": [], "source": [ "def ctx_to_classes(ctx_vector):\n", " classes = \"hero,non-hero,food,spell,side-facing\".split(\",\")\n", - " return [classes[i] for i in [ctx_vector[i].argmax().item() for i in range(ctx_vector.shape[0])]]" + " return [classes[i] for i in ctx_vector.argmax(dim=1)]" ] }, { @@ -399,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 42, "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3", "metadata": {}, "outputs": [], @@ -422,24 +446,14 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 43, "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" - ] - }, { "data": { "text/html": [ - "Tracking run with wandb version 0.15.4" + "Tracking run with wandb version 0.15.5" ], "text/plain": [ "" @@ -451,7 +465,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /Users/tcapelle/work/dlai/wandb_diffusion/wandb/run-20230704_142746-xfz2uh0q" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_122356-yw6l5eqs" ], "text/plain": [ "" @@ -463,7 +477,7 @@ { "data": { "text/html": [ - "Syncing run winter-feather-1 to Weights & Biases (docs)
" + "Syncing run earthy-firebrand-4 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -475,7 +489,7 @@ { "data": { "text/html": [ - " View project at https://wandb.ai/capecape/debug_dlai" + " View project at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion" ], "text/plain": [ "" @@ -487,7 +501,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q" + " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs" ], "text/plain": [ "" @@ -511,7 +525,7 @@ { "data": { "text/html": [ - " View run winter-feather-1 at: https://wandb.ai/capecape/debug_dlai/runs/xfz2uh0q
Synced 6 W&B file(s), 1 media file(s), 97 artifact file(s) and 1 other file(s)" + " View run earthy-firebrand-4 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs
Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 1 other file(s)" ], "text/plain": [ "" @@ -523,7 +537,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230704_142746-xfz2uh0q/logs" + "Find logs at: ./wandb/run-20230706_122356-yw6l5eqs/logs" ], "text/plain": [ "" @@ -534,8 +548,8 @@ } ], "source": [ - "with wandb.init(project=project, entity=entity, job_type=\"samplers_battle\", config=config):\n", - " wandb.log({\"samplers_tables\":table})" + "with wandb.init(project=PROJECT, job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n", + " wandb.log({\"samplers_table\":table})" ] }, { @@ -549,7 +563,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 44, "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a", "metadata": {}, "outputs": [ @@ -579,17 +593,22 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 45, "id": "626ef616-dae4-4417-9219-d67ef0794e63", "metadata": {}, "outputs": [], "source": [ - "hero_table = wandb.Table(columns=[\"generation\", \"hero\", \"non-hero\", \"food\", \"spell\", \"side-facing\"])" + "hero_table = wandb.Table(columns=[\"generation\", \n", + " \"hero\", \n", + " \"non-hero\", \n", + " \"food\", \n", + " \"spell\", \n", + " \"side-facing\"])" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 46, "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d", "metadata": {}, "outputs": [], @@ -600,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 47, "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c", "metadata": { "scrolled": true @@ -609,7 +628,7 @@ { "data": { "text/html": [ - "Tracking run with wandb version 0.15.4" + "Tracking run with wandb version 0.15.5" ], "text/plain": [ "" @@ -621,7 +640,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /Users/tcapelle/work/dlai/wandb_diffusion/wandb/run-20230704_142806-sv5fvps1" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_122412-mlom51n4" ], "text/plain": [ "" @@ -633,7 +652,7 @@ { "data": { "text/html": [ - "Syncing run summer-spaceship-2 to Weights & Biases (docs)
" + "Syncing run avid-haze-5 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -645,7 +664,7 @@ { "data": { "text/html": [ - " View project at https://wandb.ai/capecape/debug_dlai" + " View project at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion" ], "text/plain": [ "" @@ -657,7 +676,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1" + " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4" ], "text/plain": [ "" @@ -681,7 +700,7 @@ { "data": { "text/html": [ - " View run summer-spaceship-2 at: https://wandb.ai/capecape/debug_dlai/runs/sv5fvps1
Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)" + " View run avid-haze-5 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4
Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)" ], "text/plain": [ "" @@ -693,7 +712,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230704_142806-sv5fvps1/logs" + "Find logs at: ./wandb/run-20230706_122412-mlom51n4/logs" ], "text/plain": [ "" @@ -704,7 +723,7 @@ } ], "source": [ - "with wandb.init(project=project, entity=entity, job_type=\"sampling_mix\", config=config):\n", + "with wandb.init(project=PROJECT, job_type=\"sampling_mix\", config=config):\n", " wandb.log({\"hero_table\":hero_table})" ] } @@ -725,7 +744,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.11.3" } }, "nbformat": 4, From c7f91ad8e91cb5c14ba8ccebe2ded1fdd13f6295 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 12:36:31 +0000 Subject: [PATCH 09/43] increase batch size to 10k --- dlai/00_intro.ipynb | 162 +++++++++++++++++++------------------------- 1 file changed, 69 insertions(+), 93 deletions(-) diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb index 1cc2ecb6..d858d6ce 100644 --- a/dlai/00_intro.ipynb +++ b/dlai/00_intro.ipynb @@ -8,6 +8,7 @@ "outputs": [], "source": [ "import math\n", + "from pathlib import Path\n", "from types import SimpleNamespace\n", "\n", "import wandb\n", @@ -24,11 +25,20 @@ { "cell_type": "code", "execution_count": 2, + "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501", + "metadata": {}, + "outputs": [], + "source": [ + "# wandb.login() # uncomment if you want to login to wandb" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "id": "d51a9f7f", "metadata": {}, "outputs": [], "source": [ - "# Constants\n", "INPUT_SIZE = 3 * 16 * 16\n", "OUTPUT_SIZE = 5\n", "HIDDEN_SIZE = 256\n", @@ -37,12 +47,12 @@ "\n", "# Device\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", - "device = torch.device(\"mps\" if torch.backends.mps.is_built() else \"cpu\")\n", "\n", - "data_dir = './data/'\n", + "data_dir = Path('./data/')\n", "\n", "def get_dataloaders(batch_size, slice_size=None, valid_pct=0.2):\n", - " dataset = CustomDataset.from_np(data_dir + \"sprites_1788_16x16.npy\", data_dir + \"sprite_labels_nc_1788_16x16.npy\", argmax=True)\n", + " dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", \n", + " data_dir/\"sprite_labels_nc_1788_16x16.npy\", argmax=True)\n", "\n", " if slice_size:\n", " dataset = dataset.subset(slice_size)\n", @@ -67,28 +77,7 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "8700b5fa", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sprite shape: (89400, 16, 16, 3)\n", - "labels shape: (89400,)\n", - "sprite shape: (1000, 16, 16, 3)\n", - "labels shape: (1000,)\n" - ] - } - ], - "source": [ - "train_dl, valid_dl = get_dataloaders(128, slice_size=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, + "execution_count": 4, "id": "8401cf96", "metadata": {}, "outputs": [], @@ -130,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 5, "id": "e5492ebb-2dfa-44ce-af6c-24655e45a2ed", "metadata": {}, "outputs": [], @@ -138,8 +127,9 @@ "def train_model(config):\n", " \"Train a model with a given config\"\n", " wandb.init(\n", - " project=\"deeplearningai-intro\",\n", - " config=config\n", + " project=\"intro\",\n", + " config=config,\n", + " anonymous=\"allow\",\n", " )\n", "\n", " # Get the data\n", @@ -195,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 6, "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee", "metadata": {}, "outputs": [], @@ -205,21 +195,29 @@ " batch_size = 128,\n", " lr = 1e-3,\n", " dropout = 0.1,\n", - " slice_size = 1000,\n", + " slice_size = 10_000,\n", " valid_pct = 0.2,\n", ")" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 7, "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, { "data": { "text/html": [ - "Tracking run with wandb version 0.15.4" + "Tracking run with wandb version 0.15.5" ], "text/plain": [ "" @@ -231,7 +229,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /Users/tcapelle/work/dlai/wandb/run-20230705_144549-dg3tar8b" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123525-7s54fntl" ], "text/plain": [ "" @@ -243,7 +241,7 @@ { "data": { "text/html": [ - "Syncing run morning-jazz-7 to Weights & Biases (docs)
" + "Syncing run clear-sound-7 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -255,7 +253,7 @@ { "data": { "text/html": [ - " View project at https://wandb.ai/capecape/deeplearningai-intro" + " View project at https://wandb.ai/deeplearning-ai-temp/intro" ], "text/plain": [ "" @@ -267,7 +265,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b" + " View run at https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl" ], "text/plain": [ "" @@ -282,14 +280,14 @@ "text": [ "sprite shape: (89400, 16, 16, 3)\n", "labels shape: (89400,)\n", - "sprite shape: (1000, 16, 16, 3)\n", - "labels shape: (1000,)\n" + "sprite shape: (10000, 16, 16, 3)\n", + "labels shape: (10000,)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c2a535bd5a0d481e9916ab5f71edbf41", + "model_id": "e77b2cb6e7094dec858054a0ef68c5d3", "version_major": 2, "version_minor": 0 }, @@ -312,20 +310,6 @@ "metadata": {}, "output_type": "display_data" }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "70d7c9e0e2fb4edd969b284271d72c6f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='0.161 MB of 0.161 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ @@ -334,7 +318,7 @@ " .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n", " .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n", " \n", - "

Run history:


train/epoch▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇██
train/example_ct▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇███
train/train_loss█▆▅▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val/val_accuracy▁▆█
val/val_loss█▃▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct2400
train/train_loss0.21717
val/val_accuracy0.92
val/val_loss0.32078

" + "

Run history:


train/epoch▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/example_ct▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/train_loss█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/val_accuracy▁▅█
val/val_loss█▃▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct24000
train/train_loss0.0144
val/val_accuracy1.0
val/val_loss0.00669

" ], "text/plain": [ "" @@ -346,7 +330,7 @@ { "data": { "text/html": [ - " View run morning-jazz-7 at: https://wandb.ai/capecape/deeplearningai-intro/runs/dg3tar8b
Synced 7 W&B file(s), 1 media file(s), 126 artifact file(s) and 2 other file(s)" + " View run clear-sound-7 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl
Synced 7 W&B file(s), 1 media file(s), 127 artifact file(s) and 2 other file(s)" ], "text/plain": [ "" @@ -358,7 +342,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230705_144549-dg3tar8b/logs" + "Find logs at: ./wandb/run-20230706_123525-7s54fntl/logs" ], "text/plain": [ "" @@ -383,14 +367,28 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 8, "id": "4f40520a-66f8-4415-9e36-174dda06aca0", "metadata": {}, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8acd0e8533e84ae88b608e748a904422", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670229483376413, max=1.0…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/html": [ - "Tracking run with wandb version 0.15.4" + "Tracking run with wandb version 0.15.5" ], "text/plain": [ "" @@ -402,7 +400,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /Users/tcapelle/work/dlai/wandb/run-20230705_144416-iysb84lz" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123542-lbawks79" ], "text/plain": [ "" @@ -414,7 +412,7 @@ { "data": { "text/html": [ - "Syncing run icy-water-6 to Weights & Biases (docs)
" + "Syncing run fearless-bird-8 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -426,7 +424,7 @@ { "data": { "text/html": [ - " View project at https://wandb.ai/capecape/deeplearningai-intro" + " View project at https://wandb.ai/deeplearning-ai-temp/intro" ], "text/plain": [ "" @@ -438,7 +436,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz" + " View run at https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79" ], "text/plain": [ "" @@ -453,14 +451,14 @@ "text": [ "sprite shape: (89400, 16, 16, 3)\n", "labels shape: (89400,)\n", - "sprite shape: (1000, 16, 16, 3)\n", - "labels shape: (1000,)\n" + "sprite shape: (10000, 16, 16, 3)\n", + "labels shape: (10000,)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e576d9f253b6400d80f8022dafbdd326", + "model_id": "e8b441fe3ba44768b4b2215b68a437b6", "version_major": 2, "version_minor": 0 }, @@ -483,20 +481,6 @@ "metadata": {}, "output_type": "display_data" }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "7bd6b3a334c94ce58afd7c25e10d8d5a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='0.161 MB of 0.161 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ @@ -505,7 +489,7 @@ " .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n", " .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n", " \n", - "

Run history:


train/epoch▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▇▇▇██
train/example_ct▁▁▂▂▃▃▃▃▄▄▅▅▅▆▆▆▇▇███
train/train_loss█▆▅▄▄▃▃▂▂▂▂▁▂▂▁▁▁▁▁▁▁
val/val_accuracy▁██
val/val_loss█▂▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct2400
train/train_loss0.27209
val/val_accuracy0.92
val/val_loss0.32183

" + "

Run history:


train/epoch▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/example_ct▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/train_loss█▅▃▃▃▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/val_accuracy▁██
val/val_loss█▂▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct24000
train/train_loss0.02836
val/val_accuracy1.0
val/val_loss0.00944

" ], "text/plain": [ "" @@ -517,7 +501,7 @@ { "data": { "text/html": [ - " View run icy-water-6 at: https://wandb.ai/capecape/deeplearningai-intro/runs/iysb84lz
Synced 7 W&B file(s), 1 media file(s), 126 artifact file(s) and 2 other file(s)" + " View run fearless-bird-8 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79
Synced 7 W&B file(s), 1 media file(s), 124 artifact file(s) and 2 other file(s)" ], "text/plain": [ "" @@ -529,7 +513,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230705_144416-iysb84lz/logs" + "Find logs at: ./wandb/run-20230706_123542-lbawks79/logs" ], "text/plain": [ "" @@ -543,19 +527,11 @@ "config.dropout = 0.5\n", "train_model(config)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bba77c03-fcd2-43ef-9a11-8cebef617c23", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -569,7 +545,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.11.3" } }, "nbformat": 4, From e3ff1ff7cc9d1b5f59656503fd212170a602d150 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 12:38:14 +0000 Subject: [PATCH 10/43] use pathlib --- dlai/02_diffusion_sampling.ipynb | 73 ++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 32 deletions(-) diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index f831e4bb..96e13a72 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -14,13 +14,14 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 1, "id": "700e687c", "metadata": { "tags": [] }, "outputs": [], "source": [ + "from pathlib import Path\n", "import torch\n", "import torch.nn.functional as F\n", "import numpy as np\n", @@ -50,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 3, "id": "54c3a942", "metadata": { "tags": [] @@ -94,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 4, "id": "8ab66255", "metadata": {}, "outputs": [], @@ -103,13 +104,13 @@ " \"Load the model from wandb artifacts\"\n", " api = wandb.Api()\n", " artifact = api.artifact(model_artifact_name, type=\"model\")\n", - " model_path = artifact.download()\n", + " model_path = Path(artifact.download())\n", "\n", " # recover model info from the registry\n", " producer_run = artifact.logged_by()\n", "\n", " # load the weights dictionary\n", - " model_weights = torch.load(model_path + f\"/context_model.pth\", \n", + " model_weights = torch.load(model_path/\"context_model.pth\", \n", " map_location=\"cpu\")\n", "\n", " # create the model\n", @@ -128,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 5, "id": "b47633e2", "metadata": {}, "outputs": [ @@ -164,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 6, "id": "f6f479d1", "metadata": {}, "outputs": [], @@ -178,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 7, "id": "8b0f5bed", "metadata": {}, "outputs": [], @@ -204,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 8, "id": "16085a65", "metadata": {}, "outputs": [], @@ -241,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 9, "id": "d88afdba", "metadata": { "tags": [] @@ -272,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 10, "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc", "metadata": {}, "outputs": [], @@ -291,7 +292,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 11, "id": "5fdfc048-47f0-43b5-983e-da715e1ed562", "metadata": { "tags": [] @@ -329,7 +330,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 12, "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2", "metadata": {}, "outputs": [], @@ -348,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 13, "id": "89e24210-4885-4559-92e1-db10566ef5ea", "metadata": {}, "outputs": [ @@ -375,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 14, "id": "25b07c26-0ac2-428a-8351-34f8b7228074", "metadata": {}, "outputs": [ @@ -393,7 +394,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 15, "id": "af33d3c4", "metadata": {}, "outputs": [], @@ -423,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 16, "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3", "metadata": {}, "outputs": [], @@ -446,10 +447,18 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 17, "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, { "data": { "text/html": [ @@ -465,7 +474,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_122356-yw6l5eqs" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123733-eztcx1w7" ], "text/plain": [ "" @@ -477,7 +486,7 @@ { "data": { "text/html": [ - "Syncing run earthy-firebrand-4 to Weights & Biases (docs)
" + "Syncing run fresh-frost-7 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -501,7 +510,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs" + " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7" ], "text/plain": [ "" @@ -525,7 +534,7 @@ { "data": { "text/html": [ - " View run earthy-firebrand-4 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yw6l5eqs
Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 1 other file(s)" + " View run fresh-frost-7 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7
Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 2 other file(s)" ], "text/plain": [ "" @@ -537,7 +546,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230706_122356-yw6l5eqs/logs" + "Find logs at: ./wandb/run-20230706_123733-eztcx1w7/logs" ], "text/plain": [ "" @@ -563,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 18, "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a", "metadata": {}, "outputs": [ @@ -593,7 +602,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 19, "id": "626ef616-dae4-4417-9219-d67ef0794e63", "metadata": {}, "outputs": [], @@ -608,7 +617,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 20, "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d", "metadata": {}, "outputs": [], @@ -619,7 +628,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 21, "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c", "metadata": { "scrolled": true @@ -640,7 +649,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_122412-mlom51n4" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123747-yk6jzo2x" ], "text/plain": [ "" @@ -652,7 +661,7 @@ { "data": { "text/html": [ - "Syncing run avid-haze-5 to Weights & Biases (docs)
" + "Syncing run misty-field-8 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -676,7 +685,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4" + " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x" ], "text/plain": [ "" @@ -700,7 +709,7 @@ { "data": { "text/html": [ - " View run avid-haze-5 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/mlom51n4
Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)" + " View run misty-field-8 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x
Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 2 other file(s)" ], "text/plain": [ "" @@ -712,7 +721,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230706_122412-mlom51n4/logs" + "Find logs at: ./wandb/run-20230706_123747-yk6jzo2x/logs" ], "text/plain": [ "" From 964e9ea2aba50d1c0ee10c107831941094c6076c Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 13:11:09 +0000 Subject: [PATCH 11/43] 33M model, reasonable results --- dlai/04_train_llm.ipynb | 396 ++++++++++++++++++++++++---------------- 1 file changed, 239 insertions(+), 157 deletions(-) diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb index ae128ebc..3fbef4e5 100644 --- a/dlai/04_train_llm.ipynb +++ b/dlai/04_train_llm.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 40, + "execution_count": 1, "id": "fe0ed738-872b-4952-baf8-b3f7c92214a0", "metadata": {}, "outputs": [], @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 2, "id": "f64e095c-d28b-44aa-a122-4121c5c66a5b", "metadata": {}, "outputs": [], @@ -35,27 +35,93 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "a7535b8b-d220-44e8-a56c-97e250c36596", "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Found cached dataset parquet (/home/tcapelle/.cache/huggingface/datasets/MohamedRashad___parquet/MohamedRashad--characters_backstories-6398ba4bb1a6e421/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "899f1c4acc1a40d19459e9323bc75960", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1 [00:00 2048). Running this sequence through the model will result in indexing errors\n", + "Token indices sequence length is longer than the specified maximum sequence length for this model (2812 > 2048). Running this sequence through the model will result in indexing errors\n", + "Token indices sequence length is longer than the specified maximum sequence length for this model (2573 > 2048). Running this sequence through the model will result in indexing errors\n", + "Token indices sequence length is longer than the specified maximum sequence length for this model (2952 > 2048). Running this sequence through the model will result in indexing errors\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { @@ -132,60 +226,61 @@ "version_minor": 0 }, "text/plain": [ - "Map (num_proc=4): 0%| | 0/3760 [00:00 2048). Running this sequence through the model will result in indexing errors\n", + "Token indices sequence length is longer than the specified maximum sequence length for this model (4725 > 2048). Running this sequence through the model will result in indexing errors\n", + "Token indices sequence length is longer than the specified maximum sequence length for this model (2464 > 2048). Running this sequence through the model will result in indexing errors\n", + "Token indices sequence length is longer than the specified maximum sequence length for this model (3121 > 2048). Running this sequence through the model will result in indexing errors\n" + ] } ], "source": [ - "tokenized_datasets = ds.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\"])" + "tokenized_datasets = ds.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\", \"target\"])" ] }, { "cell_type": "code", - "execution_count": null, - "id": "f5880220-3b8b-414e-9e9a-6e6541784417", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 21, + "execution_count": 11, "id": "d59cc8a9-5f87-4eb7-abbc-f4fc18fea51d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'input_ids': [796, 569, 18354, 7496, 17740, 6711, 796, 220, 198],\n", - " 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}" + "[13]" ] }, - "execution_count": 21, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "tokenized_datasets[\"train\"][1]" + "tokenized_datasets[\"train\"][1][\"input_ids\"][0:10]" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 12, "id": "70bc12ae-52dc-47ad-b9ef-1e5b8af829e8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "' = Valkyria Chronicles III = \\n'" + "'.'" ] }, - "execution_count": 43, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -196,19 +291,16 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 13, "id": "85c6ad00-3825-4f12-be49-8ff336d5d398", "metadata": {}, "outputs": [], "source": [ - "block_size = 128\n", + "block_size = 256\n", "\n", "def group_texts(examples):\n", - " # Concatenate all texts.\n", " concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n", " total_length = len(concatenated_examples[list(examples.keys())[0]])\n", - " # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can\n", - " # customize this part to your needs.\n", " total_length = (total_length // block_size) * block_size\n", " # Split by chunks of max_len.\n", " result = {\n", @@ -231,7 +323,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 14, "id": "572f29c8-84d3-45b9-b8df-26de8c22bc91", "metadata": {}, "outputs": [ @@ -243,21 +335,7 @@ "version_minor": 0 }, "text/plain": [ - "Map (num_proc=4): 0%| | 0/4358 [00:00/home/tcapelle/work/edu/dlai/wandb/run-20230706_104512-3u6izcp0" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Syncing run fallen-voice-1 to Weights & Biases (docs)
" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_130647-l0pa7ivo" ], "text/plain": [ "" @@ -451,7 +459,7 @@ { "data": { "text/html": [ - " View project at https://wandb.ai/anony-moose-57595088200203951/tiny-stories?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" + "Syncing run dulcet-cherry-3 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -463,7 +471,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/anony-moose-57595088200203951/tiny-stories/runs/3u6izcp0?apiKey=589a02605f9d15ff4213130afb0b28f2a2b08ba2" + " View project at https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters" ], "text/plain": [ "" @@ -475,7 +483,7 @@ { "data": { "text/html": [ - "Do NOT share these links with anyone. They can be used to claim your runs." + " View run at https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters/runs/l0pa7ivo" ], "text/plain": [ "" @@ -490,8 +498,8 @@ "\n", "
\n", " \n", - " \n", - " [7002/7002 03:51, Epoch 3/3]\n", + " \n", + " [816/816 01:26, Epoch 3/3]\n", "
\n", " \n", " \n", @@ -504,18 +512,18 @@ " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "
16.4719006.3740475.7958005.142995
25.8121006.1963224.8871005.009582
35.6599006.1530894.8869004.998519

" @@ -530,10 +538,10 @@ { "data": { "text/plain": [ - "TrainOutput(global_step=7002, training_loss=6.346599991952852, metrics={'train_runtime': 237.6024, 'train_samples_per_second': 235.679, 'train_steps_per_second': 29.469, 'total_flos': 17136527671296.0, 'train_loss': 6.346599991952852, 'epoch': 3.0})" + "TrainOutput(global_step=816, training_loss=5.0707503650702686, metrics={'train_runtime': 92.5014, 'train_samples_per_second': 70.572, 'train_steps_per_second': 8.821, 'total_flos': 284203589566464.0, 'train_loss': 5.0707503650702686, 'epoch': 3.0})" ] }, - "execution_count": 32, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -542,13 +550,87 @@ "trainer.train()" ] }, + { + "cell_type": "markdown", + "id": "0a247e01-35d0-454f-8b7e-5f24cdf66f33", + "metadata": {}, + "source": [ + "## Generate" + ] + }, { "cell_type": "code", - "execution_count": null, - "id": "36f0eef0-8729-4d32-a2aa-b1577847f7f5", + "execution_count": 21, + "id": "6f16d43d-445f-4df5-8734-85584f95792f", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "model = trainer.model\n", + "device = next(model.parameters()).device" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "7911e43f-f4ce-4855-9f68-662438af8d24", + "metadata": {}, + "outputs": [], + "source": [ + "prompt = \"The hero was half human and cat, his strenghts were\"\n", + "\n", + "input_ids = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "844802b9-0ffc-466e-bedb-d7b7c6f337de", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 464, 4293, 373, 2063, 1692, 290, 3797, 11, 465, 43071,\n", + " 456, 912, 547]], device='cuda:0')" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "input_ids" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e0883650-ab62-49c9-88d8-7f8c4fdfb0a9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The hero was half human and cat, his strenghts were the first to be. He was the only one who had a lot of power, and he was the only one who had a lot of power. He was a great wizard, and he was the only one who could do it. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great\n" + ] + } + ], + "source": [ + "output = model.generate(input_ids, max_length = 128, num_beams=1)\n", + "output_text = tokenizer.decode(output[0], skip_special_tokens=True)\n", + "\n", + "print(output_text)" + ] } ], "metadata": { From 1ee868344fad8344c20404bb646def32d51d43bb Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 13:12:15 +0000 Subject: [PATCH 12/43] update readme --- dlai/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlai/README.md b/dlai/README.md index a20f4a8d..13db46a9 100644 --- a/dlai/README.md +++ b/dlai/README.md @@ -8,6 +8,6 @@ We instrument various notebooks from the generative AI course with W&B to track - [01_diffusion_training](01_diffusion_training.ipynb) In this notebook we train a diffusion model to generate images from the Sprites dataset. We log the training metrics to W&B. We sample from the model and log the images to W&B. - [02_diffusion_sampling](02_diffusion_sampling.ipynb) In this notebook we sample from the trained model and log the images to W&B. We compare different sampling methods and log the results. - [03 LLM evaluation and debugging](03_llm_eval.ipynb) In this notebook we generate character names using LLMs and use W&B autologgers and Tracer to evaluate and debug our generations. -- [04 WIP]() We are planning to add a CPU-based LLM finetuning notebook with a small LLM finetuned for generating names +- [04 WIP](04_train_llm.ipynb) Finetunning and LLM on a character based dataset to create heros descriptions! -The W&B dashboard: https://wandb.ai/capecape/dlai_diffusion \ No newline at end of file +The W&B dashboard: https://wandb.ai/deeplearning-ai-temp \ No newline at end of file From 0eab3807e76c18c9242dcdff06e1e500f82ea407 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 13:57:52 +0000 Subject: [PATCH 13/43] trained model --- dlai/data/weights/context_model.pth | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 dlai/data/weights/context_model.pth diff --git a/dlai/data/weights/context_model.pth b/dlai/data/weights/context_model.pth new file mode 100644 index 00000000..451319cf --- /dev/null +++ b/dlai/data/weights/context_model.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51535239b6f3e953db8ff9631278c3e6b133a5a500780bda5092db620ca8f570 +size 5989463 From 0c2cf01eed73ee93ae162a164ca7bbf98cba29f5 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 13:58:13 +0000 Subject: [PATCH 14/43] HF stack --- dlai/requirements.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dlai/requirements.txt b/dlai/requirements.txt index 886e2b28..f8014cce 100644 --- a/dlai/requirements.txt +++ b/dlai/requirements.txt @@ -4,4 +4,10 @@ matplotlib pandas numpy wandb -tqdm \ No newline at end of file +tqdm +openai +tenacity +rich +transformers +datasets +accelerate \ No newline at end of file From 8666f90b9ddd45e6e53d8ab44dba6b1b50a7dfc1 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 13:58:39 +0000 Subject: [PATCH 15/43] latest on reg --- dlai/02_diffusion_sampling.ipynb | 70 ++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index 96e13a72..26a12fc3 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "54c3a942", "metadata": { "tags": [] @@ -60,7 +60,7 @@ "source": [ "# Wandb Params\n", "PROJECT = \"sprite_diffusion\"\n", - "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:v0\" \n", + "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:latest\" \n", "\n", "# ddpm sampler hyperparameters\n", "timesteps = 500\n", @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "8ab66255", "metadata": {}, "outputs": [], @@ -129,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "b47633e2", "metadata": {}, "outputs": [ @@ -165,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "f6f479d1", "metadata": {}, "outputs": [], @@ -179,7 +179,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "8b0f5bed", "metadata": {}, "outputs": [], @@ -205,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "16085a65", "metadata": {}, "outputs": [], @@ -242,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "d88afdba", "metadata": { "tags": [] @@ -273,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc", "metadata": {}, "outputs": [], @@ -292,7 +292,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "5fdfc048-47f0-43b5-983e-da715e1ed562", "metadata": { "tags": [] @@ -330,7 +330,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2", "metadata": {}, "outputs": [], @@ -349,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "89e24210-4885-4559-92e1-db10566ef5ea", "metadata": {}, "outputs": [ @@ -376,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "25b07c26-0ac2-428a-8351-34f8b7228074", "metadata": {}, "outputs": [ @@ -394,7 +394,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "af33d3c4", "metadata": {}, "outputs": [], @@ -424,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3", "metadata": {}, "outputs": [], @@ -442,12 +442,12 @@ "id": "987cee86-2db1-4a2a-9d14-f70c6248ecb9", "metadata": {}, "source": [ - "we log the table to W&B, we can also use `wandb.init` as a context manager, this way we ensure that the run is finished when exiting the manager" + "we log the table to W&B, we can also use `wandb.init` as a context manager, this way we ensure that the run is finished when exiting the manager." ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27", "metadata": {}, "outputs": [ @@ -474,7 +474,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123733-eztcx1w7" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_135129-c1jaiuwv" ], "text/plain": [ "" @@ -486,7 +486,7 @@ { "data": { "text/html": [ - "Syncing run fresh-frost-7 to Weights & Biases (docs)
" + "Syncing run rare-thunder-9 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -510,7 +510,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7" + " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv" ], "text/plain": [ "" @@ -534,7 +534,7 @@ { "data": { "text/html": [ - " View run fresh-frost-7 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/eztcx1w7
Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 2 other file(s)" + " View run rare-thunder-9 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv
Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 1 other file(s)" ], "text/plain": [ "" @@ -546,7 +546,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230706_123733-eztcx1w7/logs" + "Find logs at: ./wandb/run-20230706_135129-c1jaiuwv/logs" ], "text/plain": [ "" @@ -572,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a", "metadata": {}, "outputs": [ @@ -602,7 +602,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "626ef616-dae4-4417-9219-d67ef0794e63", "metadata": {}, "outputs": [], @@ -617,7 +617,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d", "metadata": {}, "outputs": [], @@ -628,7 +628,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c", "metadata": { "scrolled": true @@ -649,7 +649,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123747-yk6jzo2x" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_135310-u90wajwk" ], "text/plain": [ "" @@ -661,7 +661,7 @@ { "data": { "text/html": [ - "Syncing run misty-field-8 to Weights & Biases (docs)
" + "Syncing run elated-eon-10 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -685,7 +685,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x" + " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk" ], "text/plain": [ "" @@ -709,7 +709,7 @@ { "data": { "text/html": [ - " View run misty-field-8 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/yk6jzo2x
Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 2 other file(s)" + " View run elated-eon-10 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk
Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)" ], "text/plain": [ "" @@ -721,7 +721,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230706_123747-yk6jzo2x/logs" + "Find logs at: ./wandb/run-20230706_135310-u90wajwk/logs" ], "text/plain": [ "" @@ -735,6 +735,14 @@ "with wandb.init(project=PROJECT, job_type=\"sampling_mix\", config=config):\n", " wandb.log({\"hero_table\":hero_table})" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a21faa2f-f43a-40c3-9041-7d07d73a358e", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 5071595a2cf94e37a16e1f1552478764962cac3f Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Thu, 6 Jul 2023 13:58:57 +0000 Subject: [PATCH 16/43] pathlib --- dlai/01_diffusion_training.ipynb | 537 ++----------------------------- 1 file changed, 25 insertions(+), 512 deletions(-) diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index 9d3845e8..c7d5bf20 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -23,6 +23,7 @@ }, "outputs": [], "source": [ + "from pathlib import Path\n", "from tqdm.notebook import tqdm\n", "import torch\n", "import torch.nn.functional as F\n", @@ -55,7 +56,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "54c3a942", + "id": "3d76c167-7122-4f88-9c9f-5ded96684fa5", "metadata": { "tags": [] }, @@ -74,10 +75,10 @@ "n_feat = 64 # 64 hidden dimension feature\n", "n_cfeat = 5 # context vector is of size 5\n", "height = 16 # 16x16 image\n", - "data_dir = './data/'\n", - "save_dir = './data/weights/'\n", - "if not os.path.exists(save_dir):\n", - " os.mkdir(save_dir)\n", + "data_dir = Path('./data/')\n", + "save_dir = Path('./data/weights/')\n", + "save_dir.mkdir(exist_ok=True, parents=True)\n", + "\n", "\n", "# training hyperparameters\n", "batch_size = 100\n", @@ -156,7 +157,7 @@ ], "source": [ "# load dataset and construct optimizer\n", - "dataset = CustomDataset.from_np(data_dir + \"sprites_1788_16x16.npy\", data_dir + \"sprite_labels_nc_1788_16x16.npy\")\n", + "dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", data_dir/\"sprite_labels_nc_1788_16x16.npy\")\n", "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)\n", "optim = torch.optim.Adam(nn_model.parameters(), lr=lrate, eps=1e-5)" ] @@ -278,7 +279,7 @@ "source": [ "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n", "\n", - "### You can visit the result of this [training here](https://wandb.ai)" + "### You can visit the result of this [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua)" ] }, { @@ -310,7 +311,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_120421-lqf74fua" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123702-2bkmjqyt" ], "text/plain": [ "" @@ -322,7 +323,7 @@ { "data": { "text/html": [ - "Syncing run daily-frost-1 to Weights & Biases (docs)
" + "Syncing run lemon-galaxy-6 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -346,7 +347,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua" + " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/2bkmjqyt" ], "text/plain": [ "" @@ -358,7 +359,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6aef0bb639f74bdf97d56b0c0cd1ffc5", + "model_id": "7369da274a8a448e8b4d47071261a2f1", "version_major": 2, "version_minor": 0 }, @@ -372,161 +373,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/894 [00:00(success)." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c2bd6d2f891d4c69a4c285591b2bc1e4", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='51.649 MB of 51.655 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=0.9998…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "

Run history:


epoch▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
loss█▆▅▅▄▃▃▃▃▃▄▃▃▃▃▃▃▂▂▂▃▂▂▂▂▂▂▂▂▃▃▂▂▁▁▁▂▂▁▁
lr████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁

Run summary:


epoch31
loss0.09235
lr3e-05

" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run daily-frost-1 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua
Synced 6 W&B file(s), 288 media file(s), 9 artifact file(s) and 1 other file(s)" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Find logs at: ./wandb/run-20230706_120421-lqf74fua/logs" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[11], line 22\u001b[0m\n\u001b[1;32m 20\u001b[0m pred_noise \u001b[38;5;241m=\u001b[39m nn_model(x_pert, t \u001b[38;5;241m/\u001b[39m timesteps, c\u001b[38;5;241m=\u001b[39mc) \n\u001b[1;32m 21\u001b[0m loss \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mmse_loss(pred_noise, noise)\n\u001b[0;32m---> 22\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \n\u001b[1;32m 23\u001b[0m optim\u001b[38;5;241m.\u001b[39mstep()\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# we log the relevant metrics to the workspace\u001b[39;00m\n", + "File \u001b[0;32m~/mambaforge/envs/pt2/lib/python3.11/site-packages/torch/_tensor.py:487\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 477\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m 479\u001b[0m Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m 480\u001b[0m (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 485\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m 486\u001b[0m )\n\u001b[0;32m--> 487\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/mambaforge/envs/pt2/lib/python3.11/site-packages/torch/autograd/__init__.py:200\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 195\u001b[0m retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m 197\u001b[0m \u001b[38;5;66;03m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m 198\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 200\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] } ], "source": [ @@ -918,7 +431,7 @@ " # save model periodically\n", " if ep%4==0 or ep == int(n_epoch-1):\n", " nn_model.eval()\n", - " ckpt_file = save_dir + f\"context_model.pth\"\n", + " ckpt_file = save_dir/f\"context_model.pth\"\n", " torch.save(nn_model.state_dict(), ckpt_file)\n", "\n", " # save model to wandb as an Artifact\n", From 1051a159ab398f4f4cb60776b8dd249308c57811 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Fri, 7 Jul 2023 08:32:12 +0000 Subject: [PATCH 17/43] checlpoitns --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index f1658105..76e5303f 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ __pycache__/ *$py.class .idea/ +/**/Tiny* From bfb5d630395731eb52f2cc8e36cfd0d340861eed Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Fri, 7 Jul 2023 09:23:54 +0000 Subject: [PATCH 18/43] split lines, move dataloader out --- dlai/00_intro.ipynb | 205 +++++++++++++++++++++++++++++--------------- dlai/utilities.py | 19 +++- 2 files changed, 154 insertions(+), 70 deletions(-) diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb index d858d6ce..0244b6fb 100644 --- a/dlai/00_intro.ipynb +++ b/dlai/00_intro.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "id": "e9ba792c-2baa-4c19-a132-2ed82a759e79", "metadata": {}, "outputs": [], @@ -17,14 +17,13 @@ "import torch.nn as nn\n", "import torch.nn.functional as F\n", "from torch.optim import Adam\n", - "from torch.utils.data import DataLoader, Subset\n", "\n", - "from utilities import *\n" + "from utilities import *" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 9, "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501", "metadata": {}, "outputs": [], @@ -32,9 +31,17 @@ "# wandb.login() # uncomment if you want to login to wandb" ] }, + { + "cell_type": "markdown", + "id": "8e4aad93-5819-4304-afb5-d962ee3f5fed", + "metadata": {}, + "source": [ + "We will be running the notebook on `anonymous` mode" + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "id": "d51a9f7f", "metadata": {}, "outputs": [], @@ -44,27 +51,11 @@ "HIDDEN_SIZE = 256\n", "NUM_WORKERS = 2\n", "CLASSES = [\"hero\", \"non-hero\", \"food\", \"spell\", \"side-facing\"]\n", - "\n", - "# Device\n", - "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", - "\n", - "data_dir = Path('./data/')\n", - "\n", - "def get_dataloaders(batch_size, slice_size=None, valid_pct=0.2):\n", - " dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", \n", - " data_dir/\"sprite_labels_nc_1788_16x16.npy\", argmax=True)\n", - "\n", - " if slice_size:\n", - " dataset = dataset.subset(slice_size)\n", - "\n", - " train_ds, valid_ds = dataset.split(valid_pct)\n", - "\n", - " train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=1) \n", - " valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=1)\n", - "\n", - " return train_dl, valid_dl\n", + "DATA_DIR = Path('./data/')\n", + "DEVICE = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "def get_model(dropout):\n", + " \"Simple MLP with Dropout\"\n", " return nn.Sequential(\n", " nn.Flatten(),\n", " nn.Linear(INPUT_SIZE, HIDDEN_SIZE),\n", @@ -72,12 +63,12 @@ " nn.ReLU(),\n", " nn.Dropout(dropout),\n", " nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE)\n", - " ).to(device)\n" + " ).to(DEVICE)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "id": "8401cf96", "metadata": {}, "outputs": [], @@ -90,7 +81,7 @@ "\n", " with torch.inference_mode():\n", " for i, (images, labels) in enumerate(valid_dl):\n", - " images, labels = images.to(device), labels.to(device)\n", + " images, labels = images.to(DEVICE), labels.to(DEVICE)\n", "\n", " # Forward pass\n", " outputs = model(images)\n", @@ -109,17 +100,18 @@ "\n", "def log_image_predictions_table(images, predicted, labels, probs):\n", " \"Create a wandb Table to log images, labels, and predictions\"\n", - " table = wandb.Table(columns=[\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)])\n", + " columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n", + " table = wandb.Table(columns=columns)\n", " \n", " for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n", " table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n", " \n", - " wandb.log({\"predictions_table\": table}, commit=False)\n" + " wandb.log({\"predictions_table\": table}, commit=False)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "id": "e5492ebb-2dfa-44ce-af6c-24655e45a2ed", "metadata": {}, "outputs": [], @@ -133,7 +125,10 @@ " )\n", "\n", " # Get the data\n", - " train_dl, valid_dl = get_dataloaders(config.batch_size, config.slice_size, config.valid_pct)\n", + " train_dl, valid_dl = get_dataloaders(DATA_DIR, \n", + " config.batch_size, \n", + " config.slice_size, \n", + " config.valid_pct)\n", " n_steps_per_epoch = math.ceil(len(train_dl.dataset) / config.batch_size)\n", "\n", " # A simple MLP model\n", @@ -149,7 +144,7 @@ " model.train()\n", "\n", " for step, (images, labels) in enumerate(train_dl):\n", - " images, labels = images.to(device), labels.to(device)\n", + " images, labels = images.to(DEVICE), labels.to(DEVICE)\n", "\n", " outputs = model(images)\n", " train_loss = loss_func(outputs, labels)\n", @@ -160,15 +155,14 @@ " example_ct += len(images)\n", " metrics = {\n", " \"train/train_loss\": train_loss,\n", - " \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch)) / n_steps_per_epoch,\n", + " \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch))/n_steps_per_epoch,\n", " \"train/example_ct\": example_ct\n", " }\n", + " wandb.log(metrics)\n", "\n", - " if step + 1 < n_steps_per_epoch:\n", - " # Log train metrics to wandb \n", - " wandb.log(metrics)\n", - " \n", - " val_loss, accuracy = validate_model(model, valid_dl, loss_func, log_images=(epoch == (config.epochs - 1)))\n", + " # compute validation metrics, log images on last epoch\n", + " val_loss, accuracy = validate_model(model, valid_dl, loss_func, \n", + " log_images=(epoch == (config.epochs - 1)))\n", "\n", " # Log train and validation metrics to wandb\n", " val_metrics = {\n", @@ -177,15 +171,12 @@ " }\n", " wandb.log({**metrics, **val_metrics})\n", "\n", - " # If you had a test set, this is how you could log it as a Summary metric\n", - " wandb.run.summary['test_accuracy'] = 0.8\n", - "\n", - " wandb.finish()\n" + " wandb.finish()" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee", "metadata": {}, "outputs": [], @@ -202,17 +193,97 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 14, "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6", "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" - ] + "data": { + "text/html": [ + "Finishing last run (ID:lskq5lst) before initializing another..." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Waiting for W&B process to finish... (success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c174ee7503f14722bee9a4d0db66a179", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='0.062 MB of 0.062 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run robust-salad-10 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/lskq5lst
Synced 7 W&B file(s), 0 media file(s), 0 artifact file(s) and 2 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20230707_091843-lskq5lst/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Successfully finished last run (ID:lskq5lst). Initializing new run:
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d2250f5729b647df90ebc0b6830c5d55", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670081783331625, max=1.0…" + ] + }, + "metadata": {}, + "output_type": "display_data" }, { "data": { @@ -229,7 +300,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123525-7s54fntl" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230707_091936-7ibfofq5" ], "text/plain": [ "" @@ -241,7 +312,7 @@ { "data": { "text/html": [ - "Syncing run clear-sound-7 to Weights & Biases (docs)
" + "Syncing run ruby-voice-11 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -265,7 +336,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl" + " View run at https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5" ], "text/plain": [ "" @@ -287,7 +358,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e77b2cb6e7094dec858054a0ef68c5d3", + "model_id": "02a9448faca44d53b9ae82b2db4ba30d", "version_major": 2, "version_minor": 0 }, @@ -318,7 +389,7 @@ " .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n", " .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n", " \n", - "

Run history:


train/epoch▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/example_ct▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/train_loss█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/val_accuracy▁▅█
val/val_loss█▃▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct24000
train/train_loss0.0144
val/val_accuracy1.0
val/val_loss0.00669

" + "

Run history:


train/epoch▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/example_ct▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/train_loss█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/val_accuracy▁██
val/val_loss█▂▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct24000
train/train_loss0.00699
val/val_accuracy1.0
val/val_loss0.00692

" ], "text/plain": [ "" @@ -330,7 +401,7 @@ { "data": { "text/html": [ - " View run clear-sound-7 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/7s54fntl
Synced 7 W&B file(s), 1 media file(s), 127 artifact file(s) and 2 other file(s)" + " View run ruby-voice-11 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5
Synced 7 W&B file(s), 1 media file(s), 122 artifact file(s) and 1 other file(s)" ], "text/plain": [ "" @@ -342,7 +413,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230706_123525-7s54fntl/logs" + "Find logs at: ./wandb/run-20230707_091936-7ibfofq5/logs" ], "text/plain": [ "" @@ -367,19 +438,19 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 15, "id": "4f40520a-66f8-4415-9e36-174dda06aca0", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8acd0e8533e84ae88b608e748a904422", + "model_id": "264ff7030aa5490daaab7d7f573f268d", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670229483376413, max=1.0…" + "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670244300000074, max=1.0…" ] }, "metadata": {}, @@ -400,7 +471,7 @@ { "data": { "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123542-lbawks79" + "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230707_092004-5pt6qfs9" ], "text/plain": [ "" @@ -412,7 +483,7 @@ { "data": { "text/html": [ - "Syncing run fearless-bird-8 to Weights & Biases (docs)
" + "Syncing run zesty-oath-12 to Weights & Biases (docs)
" ], "text/plain": [ "" @@ -436,7 +507,7 @@ { "data": { "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79" + " View run at https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9" ], "text/plain": [ "" @@ -458,7 +529,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e8b441fe3ba44768b4b2215b68a437b6", + "model_id": "56d2c401c89f4eb797175dff9ddbf7d0", "version_major": 2, "version_minor": 0 }, @@ -489,7 +560,7 @@ " .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n", " .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n", " \n", - "

Run history:


train/epoch▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/example_ct▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/train_loss█▅▃▃▃▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/val_accuracy▁██
val/val_loss█▂▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct24000
train/train_loss0.02836
val/val_accuracy1.0
val/val_loss0.00944

" + "

Run history:


train/epoch▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/example_ct▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/train_loss█▅▃▂▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/val_accuracy▁▆█
val/val_loss█▃▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct24000
train/train_loss0.01679
val/val_accuracy1.0
val/val_loss0.00977

" ], "text/plain": [ "" @@ -501,7 +572,7 @@ { "data": { "text/html": [ - " View run fearless-bird-8 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/lbawks79
Synced 7 W&B file(s), 1 media file(s), 124 artifact file(s) and 2 other file(s)" + " View run zesty-oath-12 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9
Synced 7 W&B file(s), 1 media file(s), 124 artifact file(s) and 2 other file(s)" ], "text/plain": [ "" @@ -513,7 +584,7 @@ { "data": { "text/html": [ - "Find logs at: ./wandb/run-20230706_123542-lbawks79/logs" + "Find logs at: ./wandb/run-20230707_092004-5pt6qfs9/logs" ], "text/plain": [ "" diff --git a/dlai/utilities.py b/dlai/utilities.py index 29ce5bc9..561ee270 100644 --- a/dlai/utilities.py +++ b/dlai/utilities.py @@ -1,5 +1,6 @@ import os import random +from pathlib import Path import matplotlib.pyplot as plt import numpy as np @@ -8,7 +9,7 @@ import torchvision.transforms as transforms from matplotlib.animation import FuncAnimation, PillowWriter from PIL import Image -from torch.utils.data import Dataset +from torch.utils.data import Dataset, DataLoader from torchvision.utils import make_grid, save_image @@ -334,8 +335,6 @@ def __init__(self, sprites, slabels, transform=default_tfms, null_context=False, self.slabels = np.argmax(slabels, axis=1) else: self.slabels = slabels - print(f"sprite shape: {self.sprites.shape}") - print(f"labels shape: {self.slabels.shape}") self.transform = transform self.null_context = null_context @@ -373,4 +372,18 @@ def split(self, pct=0.2): train_dataset, test_dataset = torch.utils.data.random_split(self, [train_size, test_size]) return train_dataset, test_dataset +def get_dataloaders(data_dir, batch_size, slice_size=None, valid_pct=0.2): + "Get train/val dataloaders for classification on sprites dataset" + dataset = CustomDataset.from_np(Path(data_dir)/"sprites_1788_16x16.npy", + Path(data_dir)/"sprite_labels_nc_1788_16x16.npy", + argmax=True) + if slice_size: + dataset = dataset.subset(slice_size) + + train_ds, valid_ds = dataset.split(valid_pct) + + train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=1) + valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=1) + + return train_dl, valid_dl From bc665272da43e5e92c40034bc2f325657521e922 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Fri, 7 Jul 2023 09:37:17 +0000 Subject: [PATCH 19/43] SimpleNamespace isort --- dlai/01_diffusion_training.ipynb | 20 +++++++++++++++----- dlai/02_diffusion_sampling.ipynb | 4 +++- dlai/utilities.py | 2 +- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index c7d5bf20..6b038bb3 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -23,6 +23,7 @@ }, "outputs": [], "source": [ + "from types import SimpleNamespace\n", "from pathlib import Path\n", "from tqdm.notebook import tqdm\n", "import torch\n", @@ -63,7 +64,7 @@ "outputs": [], "source": [ "# hyperparameters\n", - "num_samples = 32\n", + "num_samples = 30\n", "\n", "# diffusion hyperparameters\n", "timesteps = 500\n", @@ -86,7 +87,7 @@ "lrate=1e-3\n", "\n", "# we are storing the parameters in a dictionary to be logged to wandb\n", - "config = dict(\n", + "config = SimpleNamespace(\n", " num_samples=num_samples,\n", " timesteps=timesteps,\n", " beta1=beta1,\n", @@ -269,17 +270,23 @@ "noises = torch.randn(num_samples, 3, height, height).to(device) \n", "\n", "# A fixed context vector to sample from\n", - "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4]), 5).to(device=device).float()" + "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", + " 1,1,1,1,1,1,\n", + " 2,2,2,2,2,2,\n", + " 3,3,3,3,3,3,\n", + " 4,4,4,4,4,4]), \n", + " 5).to(device=device).float()" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "26765a7e-4ddc-449e-95c3-54c58a564738", "metadata": {}, "source": [ "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n", "\n", - "### You can visit the result of this [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua)" + "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua) <<" ] }, { @@ -401,7 +408,10 @@ "source": [ "# same code as before, added comments on the extra W&B instrumentation lines\n", "# create a wandb run\n", - "run = wandb.init(project=\"sprite_diffusion\", job_type=\"train\", anonymous=\"allow\", config=config)\n", + "run = wandb.init(project=\"sprite_diffusion\", \n", + " job_type=\"train\", \n", + " anonymous=\"allow\", \n", + " config=config)\n", "\n", "for ep in tqdm(range(n_epoch), leave=True, total=n_epoch):\n", " # set into train mode\n", diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index 26a12fc3..45ed8425 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -21,6 +21,7 @@ }, "outputs": [], "source": [ + "from types import SimpleNamespace\n", "from pathlib import Path\n", "import torch\n", "import torch.nn.functional as F\n", @@ -73,7 +74,7 @@ "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n", "\n", "# we are storing the parameters in a dictionary to be logged to wandb\n", - "config = dict(\n", + "config = SimpleNamespace(\n", " timesteps=timesteps,\n", " beta1=beta1,\n", " beta2=beta2,\n", @@ -194,6 +195,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "895b2bbc-46cd-4be8-a672-e84ef69bc329", "metadata": {}, diff --git a/dlai/utilities.py b/dlai/utilities.py index 561ee270..a57955f4 100644 --- a/dlai/utilities.py +++ b/dlai/utilities.py @@ -9,7 +9,7 @@ import torchvision.transforms as transforms from matplotlib.animation import FuncAnimation, PillowWriter from PIL import Image -from torch.utils.data import Dataset, DataLoader +from torch.utils.data import DataLoader, Dataset from torchvision.utils import make_grid, save_image From aa65e53df91388d274d422d05800a2a88518e87e Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Fri, 7 Jul 2023 09:37:31 +0000 Subject: [PATCH 20/43] delete old checkpoint --- dlai/data/weights/context_model_trained.pth | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 dlai/data/weights/context_model_trained.pth diff --git a/dlai/data/weights/context_model_trained.pth b/dlai/data/weights/context_model_trained.pth deleted file mode 100644 index 451319cf..00000000 --- a/dlai/data/weights/context_model_trained.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:51535239b6f3e953db8ff9631278c3e6b133a5a500780bda5092db620ca8f570 -size 5989463 From d202631d5ecd4237a3e8d145134e3260228174e2 Mon Sep 17 00:00:00 2001 From: kldarek Date: Tue, 11 Jul 2023 12:38:18 +0200 Subject: [PATCH 21/43] intro nb --- dlai/00_intro.ipynb | 554 +++++++++----------------------------------- 1 file changed, 115 insertions(+), 439 deletions(-) diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb index 0244b6fb..ac282235 100644 --- a/dlai/00_intro.ipynb +++ b/dlai/00_intro.ipynb @@ -1,8 +1,19 @@ { "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dbfb9335", + "metadata": {}, + "source": [ + "# Introduction to W&B\n", + "\n", + "We will add `wandb` to sprite classification model training, so that we can track and visualize important metrics, gain insights into our model's behavior and make informed decisions for model improvements. We will also see how to compare and analyze different experiments, collaborate with team members, and reproduce results effectively." + ] + }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "e9ba792c-2baa-4c19-a132-2ed82a759e79", "metadata": {}, "outputs": [], @@ -21,9 +32,19 @@ "from utilities import *" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c4cac7d2", + "metadata": {}, + "source": [ + "### W&B account\n", + "[Sign up](https://wandb.ai/site) for a free account at https://wandb.ai/site and then login to your wandb account to store the results of your experiments and use advanced W&B features. You can also continue to learn in anonymous mode. " + ] + }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501", "metadata": {}, "outputs": [], @@ -32,16 +53,19 @@ ] }, { + "attachments": {}, "cell_type": "markdown", - "id": "8e4aad93-5819-4304-afb5-d962ee3f5fed", + "id": "2e0bfcc9", "metadata": {}, "source": [ - "We will be running the notebook on `anonymous` mode" + "### Sprite classification\n", + "\n", + "We will build a simple model to classify sprites. " ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "d51a9f7f", "metadata": {}, "outputs": [], @@ -68,50 +92,25 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "8401cf96", + "execution_count": null, + "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee", "metadata": {}, "outputs": [], "source": [ - "def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):\n", - " \"Compute the performance of the model on the validation dataset and log a wandb.Table\"\n", - " model.eval()\n", - " val_loss = 0.0\n", - " correct = 0\n", - "\n", - " with torch.inference_mode():\n", - " for i, (images, labels) in enumerate(valid_dl):\n", - " images, labels = images.to(DEVICE), labels.to(DEVICE)\n", - "\n", - " # Forward pass\n", - " outputs = model(images)\n", - " val_loss += loss_func(outputs, labels) * labels.size(0)\n", - "\n", - " # Compute accuracy and accumulate\n", - " _, predicted = torch.max(outputs.data, 1)\n", - " correct += (predicted == labels).sum().item()\n", - "\n", - " # Log one batch of images to the dashboard, always same batch_idx.\n", - " if i == batch_idx and log_images:\n", - " log_image_predictions_table(images, predicted, labels, outputs.softmax(dim=1))\n", - "\n", - " return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n", - "\n", - "\n", - "def log_image_predictions_table(images, predicted, labels, probs):\n", - " \"Create a wandb Table to log images, labels, and predictions\"\n", - " columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n", - " table = wandb.Table(columns=columns)\n", - " \n", - " for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n", - " table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n", - " \n", - " wandb.log({\"predictions_table\": table}, commit=False)" + "# Let's define a config object to store our hyperparameters\n", + "config = SimpleNamespace(\n", + " epochs = 2,\n", + " batch_size = 128,\n", + " lr = 1e-5,\n", + " dropout = 0.5,\n", + " slice_size = 10_000,\n", + " valid_pct = 0.2,\n", + ")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "e5492ebb-2dfa-44ce-af6c-24655e45a2ed", "metadata": {}, "outputs": [], @@ -119,7 +118,7 @@ "def train_model(config):\n", " \"Train a model with a given config\"\n", " wandb.init(\n", - " project=\"intro\",\n", + " project=\"dlai-intro\",\n", " config=config,\n", " anonymous=\"allow\",\n", " )\n", @@ -176,253 +175,63 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "f33f739c-d7ef-4954-ae87-d5bdd6bf25ee", + "execution_count": null, + "id": "8401cf96", "metadata": {}, "outputs": [], "source": [ - "config = SimpleNamespace(\n", - " epochs = 3,\n", - " batch_size = 128,\n", - " lr = 1e-3,\n", - " dropout = 0.1,\n", - " slice_size = 10_000,\n", - " valid_pct = 0.2,\n", - ")" + "def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):\n", + " \"Compute the performance of the model on the validation dataset and log a wandb.Table\"\n", + " model.eval()\n", + " val_loss = 0.0\n", + " correct = 0\n", + "\n", + " with torch.inference_mode():\n", + " for i, (images, labels) in enumerate(valid_dl):\n", + " images, labels = images.to(DEVICE), labels.to(DEVICE)\n", + "\n", + " # Forward pass\n", + " outputs = model(images)\n", + " val_loss += loss_func(outputs, labels) * labels.size(0)\n", + "\n", + " # Compute accuracy and accumulate\n", + " _, predicted = torch.max(outputs.data, 1)\n", + " correct += (predicted == labels).sum().item()\n", + "\n", + " # Log one batch of images to the dashboard, always same batch_idx.\n", + " if i == batch_idx and log_images:\n", + " log_image_predictions_table(images, predicted, labels, outputs.softmax(dim=1))\n", + "\n", + " return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n", + "\n", + "\n", + "def log_image_predictions_table(images, predicted, labels, probs):\n", + " \"Create a wandb Table to log images, labels, and predictions\"\n", + " columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n", + " table = wandb.Table(columns=columns)\n", + " \n", + " for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n", + " table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n", + " \n", + " wandb.log({\"predictions_table\": table}, commit=False)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "b3df2485", + "metadata": {}, + "source": [ + "### Train model\n", + "Let's train the model with default config and check how it's doing in W&B. " ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Finishing last run (ID:lskq5lst) before initializing another..." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Waiting for W&B process to finish... (success)." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c174ee7503f14722bee9a4d0db66a179", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='0.062 MB of 0.062 MB uploaded (0.000 MB deduped)\\r'), FloatProgress(value=1.0, max…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run robust-salad-10 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/lskq5lst
Synced 7 W&B file(s), 0 media file(s), 0 artifact file(s) and 2 other file(s)" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Find logs at: ./wandb/run-20230707_091843-lskq5lst/logs" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Successfully finished last run (ID:lskq5lst). Initializing new run:
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "d2250f5729b647df90ebc0b6830c5d55", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670081783331625, max=1.0…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Tracking run with wandb version 0.15.5" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230707_091936-7ibfofq5" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Syncing run ruby-voice-11 to Weights & Biases (docs)
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View project at https://wandb.ai/deeplearning-ai-temp/intro" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sprite shape: (89400, 16, 16, 3)\n", - "labels shape: (89400,)\n", - "sprite shape: (10000, 16, 16, 3)\n", - "labels shape: (10000,)\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "02a9448faca44d53b9ae82b2db4ba30d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/3 [00:00(success)." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "

Run history:


train/epoch▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/example_ct▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/train_loss█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/val_accuracy▁██
val/val_loss█▂▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct24000
train/train_loss0.00699
val/val_accuracy1.0
val/val_loss0.00692

" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run ruby-voice-11 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/7ibfofq5
Synced 7 W&B file(s), 1 media file(s), 122 artifact file(s) and 1 other file(s)" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Find logs at: ./wandb/run-20230707_091936-7ibfofq5/logs" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "train_model(config)" ] @@ -433,176 +242,43 @@ "id": "e9ecf01d", "metadata": {}, "source": [ - "Let's try with another value of dropout:" + "Let's try with other values of learning rate:" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "4f40520a-66f8-4415-9e36-174dda06aca0", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "264ff7030aa5490daaab7d7f573f268d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "VBox(children=(Label(value='Waiting for wandb.init()...\\r'), FloatProgress(value=0.016670244300000074, max=1.0…" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Tracking run with wandb version 0.15.5" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230707_092004-5pt6qfs9" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Syncing run zesty-oath-12 to Weights & Biases (docs)
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View project at https://wandb.ai/deeplearning-ai-temp/intro" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sprite shape: (89400, 16, 16, 3)\n", - "labels shape: (89400,)\n", - "sprite shape: (10000, 16, 16, 3)\n", - "labels shape: (10000,)\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "56d2c401c89f4eb797175dff9ddbf7d0", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/3 [00:00(success)." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "

Run history:


train/epoch▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/example_ct▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/train_loss█▅▃▂▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/val_accuracy▁▆█
val/val_loss█▃▁

Run summary:


test_accuracy0.8
train/epoch3.0
train/example_ct24000
train/train_loss0.01679
val/val_accuracy1.0
val/val_loss0.00977

" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run zesty-oath-12 at: https://wandb.ai/deeplearning-ai-temp/intro/runs/5pt6qfs9
Synced 7 W&B file(s), 1 media file(s), 124 artifact file(s) and 2 other file(s)" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Find logs at: ./wandb/run-20230707_092004-5pt6qfs9/logs" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "config.dropout = 0.5\n", + "config.lr = 1e-4\n", "train_model(config)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "09008d54", + "metadata": {}, + "outputs": [], + "source": [ + "config.lr = 1e-3\n", + "train_model(config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d83ea0a", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -616,7 +292,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.8.13" } }, "nbformat": 4, From d46b3fab85a358eba43f9e790334d35e0e331114 Mon Sep 17 00:00:00 2001 From: kldarek Date: Tue, 11 Jul 2023 18:22:38 +0200 Subject: [PATCH 22/43] nb 03 --- dlai/03_llm_eval.ipynb | 139 ++++++++++++++++++++++++++++++++--------- 1 file changed, 109 insertions(+), 30 deletions(-) diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb index 797527c1..c0a4aa7c 100644 --- a/dlai/03_llm_eval.ipynb +++ b/dlai/03_llm_eval.ipynb @@ -13,15 +13,16 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2", "metadata": {}, "source": [ - "# LLM Tracing with W&B\n", + "# LLM Evaluation and Tracing with W&B\n", "\n", - "## 1. Auto-logging\n", + "## 1. Using Tables for Evaluation\n", "\n", - "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B autologging, also available for other popular LLMs and libraries such as Cohere or HuggingFace Pipelines. " + "In this section, we will call OpenAI LLM to generate names of our game assets. We will use W&B Tables to evaluate the generations. " ] }, { @@ -38,15 +39,12 @@ "\n", "import openai\n", "\n", - "from rich.markdown import Markdown\n", - "import pandas as pd\n", "from tenacity import (\n", " retry,\n", " stop_after_attempt,\n", " wait_random_exponential, # for exponential backoff\n", ") \n", "import wandb\n", - "from wandb.integration.openai import autolog\n", "from wandb_addons.prompts import Trace" ] }, @@ -57,39 +55,50 @@ "metadata": {}, "outputs": [], "source": [ - "PROJECT = \"deeplearningai-llm\"" + "PROJECT = \"dlai-llm\"\n", + "MODEL_NAME = \"gpt-3.5-turbo\"" ] }, { "cell_type": "code", "execution_count": null, - "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b", + "id": "bb575380", "metadata": {}, "outputs": [], "source": [ - "autolog({\"project\":PROJECT, \"job_type\": \"generation\", \"anonymous\":\"allow\"})" + "# wandb.login() # uncomment if you want to login to wandb" ] }, { "cell_type": "code", "execution_count": null, - "id": "b2ab394b-295b-4cfa-aade-aa274003a56a", + "id": "7c304c2b-dcd8-463c-aba4-aa47094dc16b", "metadata": {}, "outputs": [], "source": [ - "@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))\n", - "def completion_with_backoff(**kwargs):\n", - " return openai.ChatCompletion.create(**kwargs)" + "run = wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4e7bcf11", + "metadata": {}, + "source": [ + "### Simple generations\n", + "Let's start by generating names for our game assets using OpenAI `ChatCompletion`, and saving the resulting generations in W&B Tables. " ] }, { "cell_type": "code", "execution_count": null, - "id": "076e62a1-188f-47e1-bda9-5e3619e7d4ba", + "id": "b2ab394b-295b-4cfa-aade-aa274003a56a", "metadata": {}, "outputs": [], "source": [ - "MODEL_NAME = \"gpt-3.5-turbo\"" + "@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))\n", + "def completion_with_backoff(**kwargs):\n", + " return openai.ChatCompletion.create(**kwargs)" ] }, { @@ -99,19 +108,31 @@ "metadata": {}, "outputs": [], "source": [ - "def generate_and_print(system_prompt, user_prompt, n=5):\n", + "def generate_and_print(system_prompt, user_prompt, table, n=5):\n", " messages=[\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt},\n", " ]\n", + " start_time = time.time()\n", " responses = completion_with_backoff(\n", " model=MODEL_NAME,\n", " messages=messages,\n", " n = n,\n", " )\n", + " elapsed_time = time.time() - start_time\n", " for response in responses.choices:\n", " generation = response.message.content\n", - " display(Markdown(generation))" + " print(generation)\n", + " table.add_data(system_prompt,\n", + " user_prompt,\n", + " [response.message.content for response in responses.choices],\n", + " elapsed_time,\n", + " datetime.datetime.fromtimestamp(responses.created),\n", + " responses.model,\n", + " responses.usage.prompt_tokens,\n", + " responses.usage.completion_tokens,\n", + " responses.usage.total_tokens\n", + " )" ] }, { @@ -123,9 +144,32 @@ "source": [ "system_prompt = \"\"\"You are a creative copywriter.\n", "You're given a category of game asset, and your goal is to design a name of that asset.\n", - "The game is set in a fantasy world where everyone laughs and respects each other, while celebrating diversity.\"\"\"\n", + "The game is set in a fantasy world where everyone laughs and respects each other, \n", + "while celebrating diversity.\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "395880fa", + "metadata": {}, + "outputs": [], + "source": [ + "# Define W&B Table to store generations\n", + "columns = [\"system_prompt\", \"user_prompt\", \"generations\", \"elapsed_time\", \"timestamp\",\\\n", + " \"model\", \"prompt_tokens\", \"completion_tokens\", \"total_tokens\"]\n", + "table = wandb.Table(columns=columns)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fb07587", + "metadata": {}, + "outputs": [], + "source": [ "user_prompt = \"hero\"\n", - "generate_and_print(system_prompt, user_prompt)" + "generate_and_print(system_prompt, user_prompt, table)" ] }, { @@ -136,7 +180,7 @@ "outputs": [], "source": [ "user_prompt = \"jewel\"\n", - "generate_and_print(system_prompt, user_prompt)" + "generate_and_print(system_prompt, user_prompt, table)" ] }, { @@ -146,17 +190,19 @@ "metadata": {}, "outputs": [], "source": [ - "wandb.finish()" + "wandb.log({\"simple_generations\": table})\n", + "run.finish()" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "16d6d513-389d-4c67-a942-a922bce6ff1a", "metadata": {}, "source": [ "## 2. Using Tracer to log more complex chains\n", "\n", - "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario." + "How can we get more creative outputs? Let's design an LLM chain that will first randomly pick a fantasy world, and then generate character names. We will demonstrate how to use Tracer in such scenario. We will log the inputs and outputs, start and end times, whether the OpenAI call was successful, the token usage, and additional metadata." ] }, { @@ -184,7 +230,8 @@ "model_name = \"gpt-3.5-turbo\"\n", "temperature = 0.7\n", "system_message = \"\"\"You are a creative copywriter. \n", - "You're given a category of game asset, a fantasy world, and your goal is to design a name of that asset.\n", + "You're given a category of game asset and a fantasy world.\n", + "Your goal is to design a name of that asset.\n", "Provide the resulting name only, no additional description.\n", "Single name, max 3 words output, remember!\"\"\"" ] @@ -204,7 +251,9 @@ " name=\"MyCreativeChain\",\n", " kind=\"chain\",\n", " start_time_ms=start_time_ms,\n", - " metadata={\"user\": \"student_1\"})\n", + " metadata={\"user\": \"student_1\"},\n", + " model_dict={\"_kind\": \"CreativeChain\"}\n", + " )\n", "\n", " # part 2 - your chain picks a fantasy world\n", " time.sleep(3)\n", @@ -220,7 +269,9 @@ " start_time_ms=start_time_ms,\n", " end_time_ms=tool_end_time_ms,\n", " inputs={\"input\": query},\n", - " outputs={\"result\": expanded_prompt})\n", + " outputs={\"result\": expanded_prompt},\n", + " model_dict={\"_kind\": \"tool\", \"num_worlds\": len(worlds)}\n", + " )\n", "\n", " # add the TOOL span as a child of the root\n", " root_span.add_child(tool_span)\n", @@ -233,6 +284,7 @@ "\n", " response = openai.ChatCompletion.create(model=model_name,\n", " messages=messages,\n", + " max_tokens=12,\n", " temperature=temperature) \n", "\n", " llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)\n", @@ -250,6 +302,7 @@ " end_time_ms=llm_end_time_ms,\n", " inputs={\"system_prompt\":system_message, \"query\":expanded_prompt},\n", " outputs={\"response\": response_text},\n", + " model_dict={\"_kind\": \"Openai\", \"engine\": response[\"model\"], \"model\": response[\"object\"]}\n", " )\n", "\n", " # add the LLM span as a child of the Chain span...\n", @@ -275,8 +328,17 @@ "metadata": {}, "outputs": [], "source": [ - "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")\n", - "\n", + "# Let's start a new wandb run\n", + "wandb.init(project=PROJECT, job_type=\"generation\", anonymous=\"allow\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7409a004", + "metadata": {}, + "outputs": [], + "source": [ "run_creative_chain(\"hero\")" ] }, @@ -301,6 +363,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1ccc075f-32bf-4451-b7ad-ab2a49cc86b6", "metadata": {}, @@ -377,7 +440,7 @@ " self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None\n", " ) -> str:\n", " \"\"\"Use the tool asynchronously.\"\"\"\n", - " raise NotImplementedError(\"custom_search does not support async\")\n", + " raise NotImplementedError(\"pick_world does not support async\")\n", " \n", "class NameValidatorTool(BaseTool):\n", " name = \"validate_name\"\n", @@ -397,7 +460,7 @@ " self, query: str, run_manager: Optional[AsyncCallbackManagerForToolRun] = None\n", " ) -> str:\n", " \"\"\"Use the tool asynchronously.\"\"\"\n", - " raise NotImplementedError(\"custom_search does not support async\")" + " raise NotImplementedError(\"validate_name does not support async\")" ] }, { @@ -407,7 +470,7 @@ "metadata": {}, "outputs": [], "source": [ - "llm = ChatOpenAI(temperature=0)" + "llm = ChatOpenAI(temperature=0.7)" ] }, { @@ -476,6 +539,22 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643f6295", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93462bd0", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 63a90742f41eb9a0a4e9a71502142ac00d0836ea Mon Sep 17 00:00:00 2001 From: kldarek Date: Tue, 11 Jul 2023 22:14:01 +0000 Subject: [PATCH 23/43] nb 04 --- dlai/04_train_llm.ipynb | 581 ++++++++-------------------------------- 1 file changed, 105 insertions(+), 476 deletions(-) diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb index 3fbef4e5..ccb7b550 100644 --- a/dlai/04_train_llm.ipynb +++ b/dlai/04_train_llm.ipynb @@ -1,18 +1,17 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 1, - "id": "fe0ed738-872b-4952-baf8-b3f7c92214a0", + "cell_type": "markdown", + "id": "1dfae479-9399-492d-acaa-d9751615ee86", "metadata": {}, - "outputs": [], "source": [ - "# !pip install transformers accelerate dataset" + "# Finetuning a language model\n", + "Let's see how to finetune a language model to generate character backstories using HuggingFace Trainer with wandb integration. We'll use a tiny language model (`TinyStories-33M`) due to resource constraints, but the lessons you learn here should be applicable to large models too!" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f64e095c-d28b-44aa-a122-4121c5c66a5b", "metadata": {}, "outputs": [], @@ -22,531 +21,173 @@ "from transformers import AutoTokenizer\n", "from datasets import load_dataset\n", "from transformers import AutoModelForCausalLM\n", - "from transformers import Trainer, TrainingArguments" - ] - }, - { - "cell_type": "markdown", - "id": "3fd80268-c4a1-4e1a-aed3-cd5c3ab4d48f", - "metadata": {}, - "source": [ - "Load a dataset from Huggingface" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "a7535b8b-d220-44e8-a56c-97e250c36596", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Found cached dataset parquet (/home/tcapelle/.cache/huggingface/datasets/MohamedRashad___parquet/MohamedRashad--characters_backstories-6398ba4bb1a6e421/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "899f1c4acc1a40d19459e9323bc75960", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/1 [00:00 2048). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2812 > 2048). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2573 > 2048). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2952 > 2048). Running this sequence through the model will result in indexing errors\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Map (num_proc=4): 0%| | 0/465 [00:00 2048). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (4725 > 2048). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (2464 > 2048). Running this sequence through the model will result in indexing errors\n", - "Token indices sequence length is longer than the specified maximum sequence length for this model (3121 > 2048). Running this sequence through the model will result in indexing errors\n" - ] - } - ], - "source": [ - "tokenized_datasets = ds.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\", \"target\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "d59cc8a9-5f87-4eb7-abbc-f4fc18fea51d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[13]" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tokenized_datasets[\"train\"][1][\"input_ids\"][0:10]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "70bc12ae-52dc-47ad-b9ef-1e5b8af829e8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'.'" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tokenizer.decode(tokenized_datasets[\"train\"][1][\"input_ids\"])" + "# We'll create a tokenizer from model checkpoint\n", + "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)\n", + "\n", + "# We'll need padding to have same length sequences in a batch\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "\n", + "# Define a tokenization function that first concatenates text and target\n", + "def tokenize_function(example):\n", + " merged = example[\"text\"] + \" \" + example[\"target\"]\n", + " batch = tokenizer(merged, padding='max_length', truncation=True, max_length=128)\n", + " batch[\"labels\"] = batch[\"input_ids\"].copy()\n", + " return batch\n", + "\n", + "# Apply it on our dataset, and remove the text columns\n", + "tokenized_datasets = ds.map(tokenize_function, remove_columns=[\"text\", \"target\"])" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "85c6ad00-3825-4f12-be49-8ff336d5d398", + "execution_count": null, + "id": "a42417b8-ffa8-4d96-92ea-d8d949d87d5e", "metadata": {}, "outputs": [], "source": [ - "block_size = 256\n", - "\n", - "def group_texts(examples):\n", - " concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n", - " total_length = len(concatenated_examples[list(examples.keys())[0]])\n", - " total_length = (total_length // block_size) * block_size\n", - " # Split by chunks of max_len.\n", - " result = {\n", - " k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n", - " for k, t in concatenated_examples.items()\n", - " }\n", - " result[\"labels\"] = result[\"input_ids\"].copy()\n", - " return result" + "# Let's check out one prepared example\n", + "print(tokenizer.decode(tokenized_datasets[\"train\"][900]['input_ids']))" ] }, { "cell_type": "markdown", - "id": "26a73750-5e38-4236-a5c3-b356d8041dc3", - "metadata": {}, - "source": [ - "First note that we duplicate the inputs for our labels. This is because the model of the 🤗 Transformers library apply the shifting to the right, so we don't need to do it manually.\n", - "\n", - "Also note that by default, the `map` method will send a batch of 1,000 examples to be treated by the preprocessing function. So here, we will drop the remainder to make the concatenated tokenized texts a multiple of `block_size` every 1,000 examples. You can adjust this behavior by passing a higher batch size (which will also be processed slower). You can also speed-up the preprocessing by using multiprocessing:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "572f29c8-84d3-45b9-b8df-26de8c22bc91", + "id": "2e8d6b17-a63d-41f1-92cf-416064b52156", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Map (num_proc=4): 0%| | 0/1857 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_130647-l0pa7ivo" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Syncing run dulcet-cherry-3 to Weights & Biases (docs)
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View project at https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/tiny-stories-characters/runs/l0pa7ivo" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " \n", - " [816/816 01:26, Epoch 3/3]\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
EpochTraining LossValidation Loss
15.7958005.142995
24.8871005.009582
34.8869004.998519

" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "TrainOutput(global_step=816, training_loss=5.0707503650702686, metrics={'train_runtime': 92.5014, 'train_samples_per_second': 70.572, 'train_steps_per_second': 8.821, 'total_flos': 284203589566464.0, 'train_loss': 5.0707503650702686, 'epoch': 3.0})" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ + "# Let's train!\n", "trainer.train()" ] }, @@ -555,12 +196,13 @@ "id": "0a247e01-35d0-454f-8b7e-5f24cdf66f33", "metadata": {}, "source": [ - "## Generate" + "### Generate\n", + "Let's use our trained model to generate some texts with our provided prompts and save them in W&B Table. The model is tiny, replace it with a bigger one to get better results!" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "6f16d43d-445f-4df5-8734-85584f95792f", "metadata": {}, "outputs": [], @@ -571,71 +213,58 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "7911e43f-f4ce-4855-9f68-662438af8d24", "metadata": {}, "outputs": [], "source": [ - "prompt = \"The hero was half human and cat, his strenghts were\"\n", + "prompts = [\n", + " \"Generate Backstory based on following information Character Name: Frogger Character Race: Aarakocra Character Class: Ranger Output: \",\n", + " \"Generate Backstory based on following information Character Name: Smarty Character Race: Aasimar Character Class: Cleric Output: \",\n", + " \"Generate Backstory based on following information Character Name: Volcano Character Race: Android Character Class: Paladin Output: \",\n", + "]\n", + "\n", + "table = wandb.Table(columns=[\"prompt\", \"generation\"])\n", "\n", - "input_ids = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)" + "for prompt in prompts:\n", + " input_ids = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)\n", + " output = model.generate(input_ids, do_sample=True, max_new_tokens=50, top_p=0.3)\n", + " output_text = tokenizer.decode(output[0], skip_special_tokens=True)\n", + " table.add_data(prompt, output_text)\n", + " \n", + "wandb.log({'tiny_generations': table})" ] }, { "cell_type": "code", - "execution_count": 28, - "id": "844802b9-0ffc-466e-bedb-d7b7c6f337de", + "execution_count": null, + "id": "3083c6a3-fdb8-44ab-a028-c0a222a2fdef", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[ 464, 4293, 373, 2063, 1692, 290, 3797, 11, 465, 43071,\n", - " 456, 912, 547]], device='cuda:0')" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "input_ids" + "wandb.finish()" ] }, { "cell_type": "code", - "execution_count": 29, - "id": "e0883650-ab62-49c9-88d8-7f8c4fdfb0a9", + "execution_count": null, + "id": "120d5e88-2460-4716-bcba-077ff4630772", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", - "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The hero was half human and cat, his strenghts were the first to be. He was the only one who had a lot of power, and he was the only one who had a lot of power. He was a great wizard, and he was the only one who could do it. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great wizard, and he was the greatest wizard in the world. He was a great\n" - ] - } - ], - "source": [ - "output = model.generate(input_ids, max_length = 128, num_beams=1)\n", - "output_text = tokenizer.decode(output[0], skip_special_tokens=True)\n", - "\n", - "print(output_text)" - ] + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "976edc1f-418d-47a6-88e2-ca37e3b25366", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -649,7 +278,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.8.0" } }, "nbformat": 4, From 353adc0c5af4eebd43ea3c9fac4f44f3b507e864 Mon Sep 17 00:00:00 2001 From: kldarek Date: Tue, 11 Jul 2023 22:16:31 +0000 Subject: [PATCH 24/43] nb 04 anonymouse --- dlai/04_train_llm.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb index ccb7b550..fcf20b6f 100644 --- a/dlai/04_train_llm.ipynb +++ b/dlai/04_train_llm.ipynb @@ -141,7 +141,7 @@ "outputs": [], "source": [ "# Start a new wandb run\n", - "run = wandb.init(project='dlai-lm-tuning')" + "run = wandb.init(project='dlai-lm-tuning', job_type=\"training\", anonymous=\"allow\")" ] }, { From 909510d999981a96ea818efa86c26495b87758ab Mon Sep 17 00:00:00 2001 From: kldarek Date: Thu, 13 Jul 2023 16:21:17 +0000 Subject: [PATCH 25/43] revision; --- dlai/00_intro.ipynb | 83 ++++--- dlai/01_diffusion_training.ipynb | 159 ++----------- dlai/02_diffusion_sampling.ipynb | 368 +++---------------------------- dlai/03_llm_eval.ipynb | 6 +- dlai/04_train_llm.ipynb | 4 +- dlai/requirements.txt | 211 ++++++++++++++++-- 6 files changed, 282 insertions(+), 549 deletions(-) diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb index ac282235..54b61396 100644 --- a/dlai/00_intro.ipynb +++ b/dlai/00_intro.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "dbfb9335", "metadata": {}, @@ -21,8 +20,8 @@ "import math\n", "from pathlib import Path\n", "from types import SimpleNamespace\n", - "\n", "import wandb\n", + "\n", "from tqdm.auto import tqdm\n", "import torch\n", "import torch.nn as nn\n", @@ -33,27 +32,6 @@ ] }, { - "attachments": {}, - "cell_type": "markdown", - "id": "c4cac7d2", - "metadata": {}, - "source": [ - "### W&B account\n", - "[Sign up](https://wandb.ai/site) for a free account at https://wandb.ai/site and then login to your wandb account to store the results of your experiments and use advanced W&B features. You can also continue to learn in anonymous mode. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501", - "metadata": {}, - "outputs": [], - "source": [ - "# wandb.login() # uncomment if you want to login to wandb" - ] - }, - { - "attachments": {}, "cell_type": "markdown", "id": "2e0bfcc9", "metadata": {}, @@ -117,12 +95,12 @@ "source": [ "def train_model(config):\n", " \"Train a model with a given config\"\n", + " # Start a wandb run\n", " wandb.init(\n", " project=\"dlai-intro\",\n", " config=config,\n", " anonymous=\"allow\",\n", " )\n", - "\n", " # Get the data\n", " train_dl, valid_dl = get_dataloaders(DATA_DIR, \n", " config.batch_size, \n", @@ -157,9 +135,10 @@ " \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch))/n_steps_per_epoch,\n", " \"train/example_ct\": example_ct\n", " }\n", + " # Log train metrics to wandb\n", " wandb.log(metrics)\n", - "\n", - " # compute validation metrics, log images on last epoch\n", + " \n", + " # Compute validation metrics, log images on last epoch\n", " val_loss, accuracy = validate_model(model, valid_dl, loss_func, \n", " log_images=(epoch == (config.epochs - 1)))\n", "\n", @@ -168,7 +147,8 @@ " \"val/val_loss\": val_loss,\n", " \"val/val_accuracy\": accuracy\n", " }\n", - " wandb.log({**metrics, **val_metrics})\n", + " # Log validation metrics to wandb\n", + " wandb.log(val_metrics)\n", "\n", " wandb.finish()" ] @@ -204,7 +184,6 @@ "\n", " return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n", "\n", - "\n", "def log_image_predictions_table(images, predicted, labels, probs):\n", " \"Create a wandb Table to log images, labels, and predictions\"\n", " columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n", @@ -213,57 +192,75 @@ " for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n", " table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n", " \n", - " wandb.log({\"predictions_table\": table}, commit=False)" + " wandb.log({\"predictions_table\": table}, commit=False)\n" ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "b3df2485", + "id": "c4cac7d2", "metadata": {}, "source": [ - "### Train model\n", - "Let's train the model with default config and check how it's doing in W&B. " + "### W&B account\n", + "[Sign up](https://wandb.ai/site) for a free account at https://wandb.ai/site and then login to your wandb account to store the results of your experiments and use advanced W&B features. You can also continue to learn in anonymous mode. " ] }, { "cell_type": "code", "execution_count": null, - "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6", + "id": "803c37e2-7ff5-46a6-afb7-b80cb69f7501", "metadata": {}, "outputs": [], "source": [ - "train_model(config)" + "# wandb.login() # uncomment if you want to login to wandb" ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "e9ecf01d", + "id": "b3df2485", "metadata": {}, "source": [ - "Let's try with other values of learning rate:" + "### Train model\n", + "Let's train the model with default config and check how it's doing in W&B. " ] }, { "cell_type": "code", "execution_count": null, - "id": "4f40520a-66f8-4415-9e36-174dda06aca0", + "id": "9423c964-f7e3-4d3b-8a24-e70f7f4414c6", "metadata": {}, "outputs": [], "source": [ - "config.lr = 1e-4\n", "train_model(config)" ] }, { "cell_type": "code", "execution_count": null, - "id": "09008d54", + "id": "6d8af6b3-fdec-4f46-90b4-28585257e9cd", + "metadata": {}, + "outputs": [], + "source": [ + "config.epochs = 3\n", + "train_model(config)" + ] + }, + { + "cell_type": "markdown", + "id": "e9ecf01d", + "metadata": {}, + "source": [ + "Let's try other values of hyperparameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "593f7b8d-216c-4b51-a389-eaae195e5e74", "metadata": {}, "outputs": [], "source": [ - "config.lr = 1e-3\n", + "config.epochs = 1\n", + "config.lr = 1e-4\n", "train_model(config)" ] }, @@ -292,7 +289,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index 6b038bb3..d01015ed 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "958524a2-cb56-439e-850e-032dd10478f2", "metadata": {}, @@ -16,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "700e687c", "metadata": { "tags": [] @@ -37,16 +36,15 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "b88f9513", "metadata": {}, "outputs": [], "source": [ - "# wandb.login() # uncomment if you want to login to wandb" + "# wandb.login(relogin=True) # uncomment if you want to login to wandb" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "7c0d229a", "metadata": {}, @@ -56,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "3d76c167-7122-4f88-9c9f-5ded96684fa5", "metadata": { "tags": [] @@ -104,7 +102,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "bb43f98f", "metadata": {}, @@ -114,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "a705d0a8", "metadata": { "tags": [] @@ -130,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "6bc9001e", "metadata": { "tags": [] @@ -143,19 +140,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "76c63b85", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sprite shape: (89400, 16, 16, 3)\n", - "labels shape: (89400, 5)\n" - ] - } - ], + "outputs": [], "source": [ "# load dataset and construct optimizer\n", "dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", data_dir/\"sprite_labels_nc_1788_16x16.npy\")\n", @@ -165,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "eb13689d", "metadata": {}, "outputs": [], @@ -176,7 +164,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "fe8eb277", "metadata": {}, @@ -185,7 +172,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3", "metadata": {}, @@ -195,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "8b0f5bed", "metadata": {}, "outputs": [], @@ -211,7 +197,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "16085a65", "metadata": {}, "outputs": [], @@ -239,7 +225,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "d9ed46d7", "metadata": {}, @@ -248,7 +233,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c", "metadata": {}, @@ -258,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "d88afdba", "metadata": { "tags": [] @@ -279,7 +263,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "26765a7e-4ddc-449e-95c3-54c58a564738", "metadata": {}, @@ -291,120 +274,10 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "f5f4af69", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" - ] - }, - { - "data": { - "text/html": [ - "Tracking run with wandb version 0.15.5" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_123702-2bkmjqyt" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Syncing run lemon-galaxy-6 to Weights & Biases (docs)
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View project at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/2bkmjqyt" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "7369da274a8a448e8b4d47071261a2f1", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/32 [00:00 22\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \n\u001b[1;32m 23\u001b[0m optim\u001b[38;5;241m.\u001b[39mstep()\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# we log the relevant metrics to the workspace\u001b[39;00m\n", - "File \u001b[0;32m~/mambaforge/envs/pt2/lib/python3.11/site-packages/torch/_tensor.py:487\u001b[0m, in \u001b[0;36mTensor.backward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 477\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[1;32m 479\u001b[0m Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[1;32m 480\u001b[0m (\u001b[38;5;28mself\u001b[39m,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 485\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m 486\u001b[0m )\n\u001b[0;32m--> 487\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/mambaforge/envs/pt2/lib/python3.11/site-packages/torch/autograd/__init__.py:200\u001b[0m, in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 195\u001b[0m retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m 197\u001b[0m \u001b[38;5;66;03m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[1;32m 198\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[0;32m--> 200\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[1;32m 201\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "# same code as before, added comments on the extra W&B instrumentation lines\n", "# create a wandb run\n", @@ -470,7 +343,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -484,7 +357,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index 45ed8425..2dd0c663 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "958524a2-cb56-439e-850e-032dd10478f2", "metadata": {}, @@ -14,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "700e687c", "metadata": { "tags": [] @@ -33,16 +32,15 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "beab0dad-c211-4e3c-ab80-de52788f27e2", + "execution_count": null, + "id": "dcaf7a29-782c-4735-991f-4408f5ec6128", "metadata": {}, "outputs": [], "source": [ - "# wandb.login() # uncomment if you want to login to wandb" + "# wandb.login(relogin=True) # uncomment if you want to login to wandb" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "7c0d229a", "metadata": {}, @@ -52,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "54c3a942", "metadata": { "tags": [] @@ -86,7 +84,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "bb43f98f", "metadata": {}, @@ -96,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "8ab66255", "metadata": {}, "outputs": [], @@ -130,24 +127,15 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "b47633e2", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[34m\u001b[1mwandb\u001b[0m: 1 of 1 files downloaded. \n" - ] - } - ], + "outputs": [], "source": [ "nn_model = load_model(MODEL_ARTIFACT)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "fe8eb277", "metadata": {}, @@ -156,7 +144,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3", "metadata": {}, @@ -166,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "f6f479d1", "metadata": {}, "outputs": [], @@ -180,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "8b0f5bed", "metadata": {}, "outputs": [], @@ -195,7 +182,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "895b2bbc-46cd-4be8-a672-e84ef69bc329", "metadata": {}, @@ -207,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "16085a65", "metadata": {}, "outputs": [], @@ -234,7 +220,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c", "metadata": {}, @@ -244,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "d88afdba", "metadata": { "tags": [] @@ -265,7 +250,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "1cbf9ef8-619a-4052-a138-a88c0f0f8b0b", "metadata": {}, @@ -275,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc", "metadata": {}, "outputs": [], @@ -294,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "5fdfc048-47f0-43b5-983e-da715e1ed562", "metadata": { "tags": [] @@ -322,7 +306,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "aee10774-ff79-4df7-9b2d-1908561c23e5", "metadata": {}, @@ -332,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2", "metadata": {}, "outputs": [], @@ -341,7 +324,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "90b838be-8fa1-4c12-9c4f-e40dfacc08e1", "metadata": {}, @@ -351,24 +333,15 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "89e24210-4885-4559-92e1-db10566ef5ea", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sampling timestep 1\r" - ] - } - ], + "outputs": [], "source": [ "ddpm_samples, _ = sample_ddpm_context(noises, ctx_vector)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "836584a1-26b5-45b1-98c9-0c45d639c8f9", "metadata": {}, @@ -378,25 +351,17 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "25b07c26-0ac2-428a-8351-34f8b7228074", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sampling timestep 20\r" - ] - } - ], + "outputs": [], "source": [ "ddim_samples, _ = sample_ddim_context(noises, ctx_vector, n=ddim_n)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "af33d3c4", "metadata": {}, "outputs": [], @@ -407,7 +372,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "daea8275-0356-452e-a9f9-2824ef53f1ea", "metadata": {}, @@ -416,7 +380,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "85be303d-0f0b-4df4-8c87-bd1bfb6145a2", "metadata": {}, @@ -426,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "481afea1-ae53-4b5b-a3db-1d49be0733a3", "metadata": {}, "outputs": [], @@ -439,7 +402,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "987cee86-2db1-4a2a-9d14-f70c6248ecb9", "metadata": {}, @@ -449,295 +411,15 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "bbc7a2ca-ae05-4462-9ae3-82eb1a6dbc27", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", - "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" - ] - }, - { - "data": { - "text/html": [ - "Tracking run with wandb version 0.15.5" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_135129-c1jaiuwv" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Syncing run rare-thunder-9 to Weights & Biases (docs)
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View project at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Waiting for W&B process to finish... (success)." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run rare-thunder-9 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/c1jaiuwv
Synced 6 W&B file(s), 1 media file(s), 91 artifact file(s) and 1 other file(s)" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Find logs at: ./wandb/run-20230706_135129-c1jaiuwv/logs" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "with wandb.init(project=PROJECT, job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n", " wandb.log({\"samplers_table\":table})" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "a974258a-55fb-43ef-9136-985ec85bc3fc", - "metadata": {}, - "source": [ - "## Mixing classes during sampling" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "68987e12-22d1-4c40-b0d2-b33f6397c34a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sampling timestep 1\r" - ] - } - ], - "source": [ - "ctx = torch.tensor([\n", - " # hero, non-hero, food, spell, side-facing\n", - " [1,0,0,0,0], #human\n", - " [1,0,0.6,0,0], \n", - " [0,0,0.6,0.4,0], \n", - " [1,0,0,0,1], \n", - " [1,1,0,0,0],\n", - " [1,0,0,1,0]\n", - "]).float().to(device)\n", - "\n", - "# let's pass the same noise everytime\n", - "samples = torch.cat([torch.randn(1, 3, height, height)]*6, axis=0).to(device) \n", - "ddpm_samples, _ = sample_ddpm_context(samples, ctx)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "626ef616-dae4-4417-9219-d67ef0794e63", - "metadata": {}, - "outputs": [], - "source": [ - "hero_table = wandb.Table(columns=[\"generation\", \n", - " \"hero\", \n", - " \"non-hero\", \n", - " \"food\", \n", - " \"spell\", \n", - " \"side-facing\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "dbeb2d18-feb9-4452-b368-3f7f03b1715d", - "metadata": {}, - "outputs": [], - "source": [ - "for s, c in zip(ddpm_samples, ctx.cpu().numpy().tolist()):\n", - " hero_table.add_data(wandb.Image(s), *c)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "8e196f2b-4d13-4cc2-a380-2f23530bee1c", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "Tracking run with wandb version 0.15.5" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Run data is saved locally in /home/tcapelle/work/edu/dlai/wandb/run-20230706_135310-u90wajwk" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Syncing run elated-eon-10 to Weights & Biases (docs)
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View project at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run at https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Waiting for W&B process to finish... (success)." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - " View run elated-eon-10 at: https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/u90wajwk
Synced 6 W&B file(s), 1 media file(s), 7 artifact file(s) and 1 other file(s)" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Find logs at: ./wandb/run-20230706_135310-u90wajwk/logs" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "with wandb.init(project=PROJECT, job_type=\"sampling_mix\", config=config):\n", - " wandb.log({\"hero_table\":hero_table})" - ] - }, { "cell_type": "code", "execution_count": null, @@ -749,7 +431,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -763,7 +445,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/dlai/03_llm_eval.ipynb b/dlai/03_llm_eval.ipynb index c0a4aa7c..567bb33a 100644 --- a/dlai/03_llm_eval.ipynb +++ b/dlai/03_llm_eval.ipynb @@ -13,7 +13,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "53c0d4d6-3d2b-45e5-90fa-ba7953496ec2", "metadata": {}, @@ -80,7 +79,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "4e7bcf11", "metadata": {}, @@ -195,7 +193,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "16d6d513-389d-4c67-a942-a922bce6ff1a", "metadata": {}, @@ -363,7 +360,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "1ccc075f-32bf-4451-b7ad-ab2a49cc86b6", "metadata": {}, @@ -573,7 +569,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb index fcf20b6f..5148cb2c 100644 --- a/dlai/04_train_llm.ipynb +++ b/dlai/04_train_llm.ipynb @@ -156,7 +156,7 @@ "training_args = TrainingArguments(\n", " f\"{model_name}-finetuned-characters-backstories\",\n", " report_to=\"wandb\", # we need one line to track experiments in wandb\n", - " num_train_epochs=3,\n", + " num_train_epochs=1,\n", " logging_steps=1,\n", " evaluation_strategy = \"epoch\",\n", " learning_rate=1e-4,\n", @@ -278,7 +278,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.0" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/dlai/requirements.txt b/dlai/requirements.txt index f8014cce..96f321ce 100644 --- a/dlai/requirements.txt +++ b/dlai/requirements.txt @@ -1,13 +1,198 @@ -torch>=2.0 -torchvision>=0.15 -matplotlib -pandas -numpy -wandb -tqdm -openai -tenacity -rich -transformers -datasets -accelerate \ No newline at end of file +accelerate==0.21.0 +aiohttp==3.8.4 +aiosignal==1.3.1 +anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1688651106312/work/dist +appdirs==1.4.4 +argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1640817743617/work +argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1649500321618/work +asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1670263926556/work +async-lru @ file:///home/conda/feedstock_root/build_artifacts/async-lru_1688997201545/work +async-timeout==4.0.2 +attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work +Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1677767029043/work +backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work +backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1687772187254/work +beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1680888073205/work +bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1674535352125/work +Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1648883617327/work +build==0.10.0 +CacheControl==0.12.14 +certifi==2023.5.7 +cffi @ file:///tmp/abs_98z5h56wf8/croots/recipe/cffi_1659598650955/work +charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1688813409104/work +cleo==2.0.1 +click==8.1.5 +cmake==3.26.4 +contourpy==1.1.0 +crashtest==0.4.1 +cryptography==41.0.2 +cycler==0.11.0 +dataclasses-json==0.5.9 +datasets==2.13.1 +decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work +defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work +dill==0.3.6 +distlib==0.3.6 +docker-pycreds==0.4.0 +dulwich==0.21.5 +entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work +exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1688381075899/work +executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1667317341051/work +fastjsonschema @ file:///home/conda/feedstock_root/build_artifacts/python-fastjsonschema_1684761244589/work/dist +filelock==3.12.2 +flit_core @ file:///home/conda/feedstock_root/build_artifacts/flit-core_1684084314667/work/source/flit_core +fonttools==4.41.0 +frozenlist==1.4.0 +fsspec==2023.6.0 +gitdb==4.0.10 +GitPython==3.1.32 +greenlet==2.0.2 +html5lib==1.1 +huggingface-hub==0.16.4 +idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1663625384323/work +importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1688754491823/work +importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1689017639396/work +installer==0.7.0 +ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1620912942381/work/dist/ipykernel-5.5.5-py3-none-any.whl +ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1685727741709/work +ipython-genutils==0.2.0 +ipywidgets==8.0.7 +jaraco.classes==3.3.0 +jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1669134318875/work +jeepney==0.8.0 +Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1654302431367/work +json5 @ file:///home/conda/feedstock_root/build_artifacts/json5_1688248289187/work +jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1669810440410/work +jupyter-events @ file:///home/conda/feedstock_root/build_artifacts/jupyter_events_1673559782596/work +jupyter-lsp @ file:///home/conda/feedstock_root/build_artifacts/jupyter-lsp-meta_1685453365113/work/jupyter-lsp +jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1687700988094/work +jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1686775603087/work +jupyter_server @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_1687869799272/work +jupyter_server_terminals @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_terminals_1673491454549/work +jupyterlab @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_1689253413907/work +jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1649936611996/work +jupyterlab-widgets==3.0.8 +jupyterlab_server @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_server_1686659921555/work +keyring==23.13.1 +kiwisolver==1.4.4 +langchain==0.0.232 +langsmith==0.0.5 +lit==16.0.6 +lockfile==0.12.2 +markdown-it-py==3.0.0 +MarkupSafe @ file:///opt/conda/conda-bld/markupsafe_1654597864307/work +marshmallow==3.19.0 +marshmallow-enum==1.5.1 +matplotlib==3.7.2 +matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work +mdurl==0.1.2 +mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1686313613819/work/dist +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.5 +multidict==6.0.4 +multiprocess==0.70.14 +mypy-extensions==1.0.0 +nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1684790896106/work +nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert-meta_1687202153002/work +nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1688996247388/work +networkx==3.1 +notebook_shim @ file:///home/conda/feedstock_root/build_artifacts/notebook-shim_1682360583588/work +numexpr==2.8.4 +numpy==1.25.1 +nvidia-cublas-cu11==11.10.3.66 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +nvidia-cuda-runtime-cu11==11.7.99 +nvidia-cudnn-cu11==8.5.0.96 +nvidia-cufft-cu11==10.9.0.58 +nvidia-curand-cu11==10.2.10.91 +nvidia-cusolver-cu11==11.4.0.1 +nvidia-cusparse-cu11==11.7.4.91 +nvidia-nccl-cu11==2.14.3 +nvidia-nvtx-cu11==11.7.91 +openai==0.27.8 +openapi-schema-pydantic==1.2.4 +overrides @ file:///home/conda/feedstock_root/build_artifacts/overrides_1666057828264/work +packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1681337016113/work +pandas==2.0.3 +pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work +parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1638334955874/work +pathtools==0.1.2 +pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1667297516076/work +pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602536217715/work +Pillow==10.0.0 +pkginfo==1.9.6 +pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1633981968097/work +platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1688739404342/work +poetry==1.5.1 +poetry-core==1.6.1 +poetry-plugin-export==1.4.0 +prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1689032443210/work +prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1688565951714/work +protobuf==4.23.4 +psutil==5.9.5 +ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl +pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work +pyarrow==12.0.1 +pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work +pydantic==1.10.11 +Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1681904169130/work +pyparsing==3.0.9 +pyproject_hooks==1.0.0 +pyrsistent @ file:///tmp/build/80754af9/pyrsistent_1636110951836/work +PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work +python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/python-dateutil_1626286286081/work +python-json-logger @ file:///home/conda/feedstock_root/build_artifacts/python-json-logger_1677079630776/work +pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1680088766131/work +PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1648757097602/work +pyzmq @ file:///croot/pyzmq_1686601365461/work +rapidfuzz==2.15.1 +regex==2023.6.3 +requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1684774241324/work +requests-toolbelt==1.0.0 +rfc3339-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3339-validator_1638811747357/work +rfc3986-validator @ file:///home/conda/feedstock_root/build_artifacts/rfc3986-validator_1598024191506/work +rich==13.4.2 +safetensors==0.3.1 +SecretStorage==3.3.3 +Send2Trash @ file:///home/conda/feedstock_root/build_artifacts/send2trash_1682601222253/work +sentry-sdk==1.28.1 +setproctitle==1.3.2 +shellingham==1.5.0.post1 +six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work +smmap==5.0.0 +sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work +soupsieve @ file:///home/conda/feedstock_root/build_artifacts/soupsieve_1658207591808/work +SQLAlchemy==2.0.18 +stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work +sympy==1.12 +tenacity==8.2.2 +terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1670253674810/work +tinycss2 @ file:///home/conda/feedstock_root/build_artifacts/tinycss2_1666100256010/work +tokenizers==0.13.3 +tomli @ file:///home/conda/feedstock_root/build_artifacts/tomli_1644342247877/work +tomlkit==0.11.8 +torch==2.0.1 +torchvision==0.15.2 +tornado @ file:///opt/conda/conda-bld/tornado_1662061693373/work +tqdm==4.65.0 +traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work +transformers==4.30.2 +triton==2.0.0 +trove-classifiers==2023.7.6 +typing-inspect==0.9.0 +typing-utils @ file:///home/conda/feedstock_root/build_artifacts/typing_utils_1622899189314/work +typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1688315532570/work +tzdata==2023.3 +urllib3==1.26.16 +virtualenv==20.23.1 +wandb==0.15.5 +wandb-addons @ file:///home/darek/projects/edu/dlai/wandb-addons +wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1673864653149/work +webencodings==0.5.1 +websocket-client @ file:///home/conda/feedstock_root/build_artifacts/websocket-client_1687789148259/work +widgetsnbextension==4.0.8 +xxhash==3.2.0 +yarl==1.9.2 +zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1689027407711/work From b3af0c9031cea7e79146c230a00ed6ba995c955f Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Mon, 17 Jul 2023 13:50:27 +0200 Subject: [PATCH 26/43] move ddpm out --- dlai/01_diffusion_training.ipynb | 108 ++++++------------------------- dlai/utilities.py | 56 +++++++++++++++- 2 files changed, 73 insertions(+), 91 deletions(-) diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index d01015ed..13111df4 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "958524a2-cb56-439e-850e-032dd10478f2", "metadata": {}, @@ -41,10 +42,11 @@ "metadata": {}, "outputs": [], "source": [ - "# wandb.login(relogin=True) # uncomment if you want to login to wandb" + "wandb.login(anonymous=\"allow\")" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7c0d229a", "metadata": {}, @@ -70,7 +72,7 @@ "beta2 = 0.02\n", "\n", "# network hyperparameters\n", - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n", + "device = get_device()\n", "n_feat = 64 # 64 hidden dimension feature\n", "n_cfeat = 5 # context vector is of size 5\n", "height = 16 # 16x16 image\n", @@ -84,7 +86,7 @@ "n_epoch = 32\n", "lrate=1e-3\n", "\n", - "# we are storing the parameters in a dictionary to be logged to wandb\n", + "# we are storing the parameters to be logged to wandb\n", "config = SimpleNamespace(\n", " num_samples=num_samples,\n", " timesteps=timesteps,\n", @@ -103,26 +105,22 @@ }, { "cell_type": "markdown", - "id": "bb43f98f", + "id": "9c99dea4", "metadata": {}, "source": [ - "All this is the same as the previous notebook, except for the addition of the context vector size n_cfeat. We will use this to condition the diffusion model on a context vector." + "setup DDPM noise scheduler and sampler (same as in the generative Ai course). \n", + "- perturb_input: Adds noise to the input image at the corresponding timestep on the schedule\n", + "- sample_ddpm_context: Samples from the model using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing" ] }, { "cell_type": "code", "execution_count": null, - "id": "a705d0a8", - "metadata": { - "tags": [] - }, + "id": "6c642e1d", + "metadata": {}, "outputs": [], "source": [ - "# construct DDPM noise schedule\n", - "b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1\n", - "a_t = 1 - b_t\n", - "ab_t = torch.cumsum(a_t.log(), dim=0).exp() \n", - "ab_t[0] = 1" + "perturb_input, sample_ddpm_context = setup_ddpm(beta1, beta2, timesteps, device)" ] }, { @@ -152,79 +150,7 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "eb13689d", - "metadata": {}, - "outputs": [], - "source": [ - "# helper function: perturbs an image to a specified noise level\n", - "def perturb_input(x, t, noise):\n", - " return ab_t.sqrt()[t, None, None, None] * x + (1 - ab_t[t, None, None, None]) * noise" - ] - }, - { - "cell_type": "markdown", - "id": "fe8eb277", - "metadata": {}, - "source": [ - "## Sampling" - ] - }, - { - "cell_type": "markdown", - "id": "45d92c52-8a11-450c-bc78-ffa221af2fa3", - "metadata": {}, - "source": [ - "We will need to instrument the sampler to have telemetry on the generated images while training!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b0f5bed", - "metadata": {}, - "outputs": [], - "source": [ - "# helper function; removes the predicted noise (but adds some noise back in to avoid collapse)\n", - "def denoise_add_noise(x, t, pred_noise, z=None):\n", - " if z is None:\n", - " z = torch.randn_like(x)\n", - " noise = b_t.sqrt()[t] * z\n", - " mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt()\n", - " return mean + noise" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16085a65", - "metadata": {}, - "outputs": [], - "source": [ - "# sample with context using standard algorithm\n", - "# we make a change to the original algorithm to allow for context explicitely (the noises)\n", - "@torch.no_grad()\n", - "def sample_ddpm_context(samples, context, save_rate=20):\n", - " # array to keep track of generated steps for plotting\n", - " intermediate = [] \n", - " for i in range(timesteps, 0, -1):\n", - " # reshape time tensor\n", - " t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n", - "\n", - " # sample some random noise to inject back in. For i = 1, don't add back in noise\n", - " z = torch.randn_like(samples) if i > 1 else 0\n", - "\n", - " eps = nn_model(samples, t, c=context) # predict noise e_(x_t,t, ctx)\n", - " samples = denoise_add_noise(samples, i, eps, z)\n", - " if i % save_rate==0 or i==timesteps or i<8:\n", - " intermediate.append(samples.detach().cpu().numpy())\n", - "\n", - " intermediate = np.stack(intermediate)\n", - " return samples.clip(-1, 1), intermediate" - ] - }, - { + "attachments": {}, "cell_type": "markdown", "id": "d9ed46d7", "metadata": {}, @@ -233,6 +159,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c", "metadata": {}, @@ -263,6 +190,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "26765a7e-4ddc-449e-95c3-54c58a564738", "metadata": {}, @@ -325,7 +253,7 @@ " wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n", "\n", " # sample the model and log the images to W&B\n", - " samples, _ = sample_ddpm_context(noises, ctx_vector[:num_samples])\n", + " samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector[:num_samples])\n", " wandb.log({\"train_samples\": [wandb.Image(img) for img in samples.split(1)]})\n", "\n", "# finish W&B run\n", @@ -335,7 +263,7 @@ { "cell_type": "code", "execution_count": null, - "id": "78ddaed3-8184-4161-a1d6-5af139b336d0", + "id": "f676315f", "metadata": {}, "outputs": [], "source": [] @@ -357,7 +285,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/dlai/utilities.py b/dlai/utilities.py index a57955f4..d080d5c0 100644 --- a/dlai/utilities.py +++ b/dlai/utilities.py @@ -1,4 +1,4 @@ -import os +import os, sys import random from pathlib import Path @@ -13,6 +13,15 @@ from torchvision.utils import make_grid, save_image +def get_device(): + "Pick GPU if cuda is available, mps if Mac, else CPU" + if torch.cuda.is_available(): + return torch.device("cuda") + elif sys.platform == "darwin" and torch.backends.mps.is_available(): + return torch.device("mps") + else: + return torch.device("cpu") + def _fig_bounds(x): r = x//32 return min(5, max(1,r)) @@ -387,3 +396,48 @@ def get_dataloaders(data_dir, batch_size, slice_size=None, valid_pct=0.2): valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False, num_workers=1) return train_dl, valid_dl + + +## diffusion functions + +def setup_ddpm(beta1, beta2, timesteps, device): + # construct DDPM noise schedule and sampling functions + b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1 + a_t = 1 - b_t + ab_t = torch.cumsum(a_t.log(), dim=0).exp() + ab_t[0] = 1 + + # helper function: perturbs an image to a specified noise level + def perturb_input(x, t, noise): + return ab_t.sqrt()[t, None, None, None] * x + (1 - ab_t[t, None, None, None]) * noise + + # helper function; removes the predicted noise (but adds some noise back in to avoid collapse) + def _denoise_add_noise(x, t, pred_noise, z=None): + if z is None: + z = torch.randn_like(x) + noise = b_t.sqrt()[t] * z + mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt() + return mean + noise + + # sample with context using standard algorithm + # we make a change to the original algorithm to allow for context explicitely (the noises) + @torch.no_grad() + def sample_ddpm_context(nn_model, noises, context, save_rate=20): + # array to keep track of generated steps for plotting + intermediate = [] + for i in range(timesteps, 0, -1): + # reshape time tensor + t = torch.tensor([i / timesteps])[:, None, None, None].to(noises.device) + + # sample some random noise to inject back in. For i = 1, don't add back in noise + z = torch.randn_like(noises) if i > 1 else 0 + + eps = nn_model(noises, t, c=context) # predict noise e_(x_t,t, ctx) + noises = _denoise_add_noise(noises, i, eps, z) + if i % save_rate==0 or i==timesteps or i<8: + intermediate.append(noises.detach().cpu().numpy()) + + intermediate = np.stack(intermediate) + return noises.clip(-1, 1), intermediate + + return perturb_input, sample_ddpm_context \ No newline at end of file From 27be48a9173afcb944e7357ff9a5112288e44929 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Mon, 17 Jul 2023 14:08:43 +0200 Subject: [PATCH 27/43] add tqdm --- dlai/utilities.py | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/dlai/utilities.py b/dlai/utilities.py index d080d5c0..f69a0b9a 100644 --- a/dlai/utilities.py +++ b/dlai/utilities.py @@ -7,6 +7,7 @@ import torch import torch.nn as nn import torchvision.transforms as transforms +from tqdm.auto import tqdm from matplotlib.animation import FuncAnimation, PillowWriter from PIL import Image from torch.utils.data import DataLoader, Dataset @@ -425,7 +426,7 @@ def _denoise_add_noise(x, t, pred_noise, z=None): def sample_ddpm_context(nn_model, noises, context, save_rate=20): # array to keep track of generated steps for plotting intermediate = [] - for i in range(timesteps, 0, -1): + for i in tqdm(range(timesteps, 0, -1), leave=False): # reshape time tensor t = torch.tensor([i / timesteps])[:, None, None, None].to(noises.device) @@ -440,4 +441,42 @@ def sample_ddpm_context(nn_model, noises, context, save_rate=20): intermediate = np.stack(intermediate) return noises.clip(-1, 1), intermediate - return perturb_input, sample_ddpm_context \ No newline at end of file + return perturb_input, sample_ddpm_context + + +def setup_ddim(beta1, beta2, timesteps, device): + # define sampling function for DDIM + b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1 + a_t = 1 - b_t + ab_t = torch.cumsum(a_t.log(), dim=0).exp() + ab_t[0] = 1 + # removes the noise using ddim + def denoise_ddim(x, t, t_prev, pred_noise): + ab = ab_t[t] + ab_prev = ab_t[t_prev] + + x0_pred = ab_prev.sqrt() / ab.sqrt() * (x - (1 - ab).sqrt() * pred_noise) + dir_xt = (1 - ab_prev).sqrt() * pred_noise + + return x0_pred + dir_xt + + # fast sampling algorithm with context + @torch.no_grad() + def sample_ddim_context(nn_model, noises, context, n=25): + # array to keep track of generated steps for plotting + intermediate = [] + step_size = timesteps // n + for i in tqdm(range(timesteps, 0, -step_size), leave=False): + print(f'sampling timestep {i:3d}', end='\r') + + # reshape time tensor + t = torch.tensor([i / timesteps])[:, None, None, None].to(device) + + eps = nn_model(noises, t, c=context) # predict noise e_(x_t,t) + noises = denoise_ddim(noises, i, i - step_size, eps) + intermediate.append(noises.detach().cpu().numpy()) + + intermediate = np.stack(intermediate) + return noises.clip(-1, 1), intermediate + + return sample_ddim_context \ No newline at end of file From deb14d12da86a6a13609067488aa5d40c15e639a Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Mon, 17 Jul 2023 14:12:52 +0200 Subject: [PATCH 28/43] simplify --- dlai/02_diffusion_sampling.ipynb | 132 +++++-------------------------- 1 file changed, 18 insertions(+), 114 deletions(-) diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index 2dd0c663..93d816ac 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -37,7 +37,7 @@ "metadata": {}, "outputs": [], "source": [ - "# wandb.login(relogin=True) # uncomment if you want to login to wandb" + "wandb.login(anonymous=\"allow\")" ] }, { @@ -69,7 +69,7 @@ "height = 16\n", "ddim_n = 25\n", "\n", - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else torch.device('cpu'))\n", + "device = get_device()\n", "\n", "# we are storing the parameters in a dictionary to be logged to wandb\n", "config = SimpleNamespace(\n", @@ -88,7 +88,7 @@ "id": "bb43f98f", "metadata": {}, "source": [ - "We will load the model from a wandb.Artifact and set up the sampling loop." + "In the previous notebook we saved the best model as a wandb Artifact (our way of storing files during runs). We will now load the model from wandb and set up the sampling loop." ] }, { @@ -154,69 +154,11 @@ { "cell_type": "code", "execution_count": null, - "id": "f6f479d1", + "id": "146424d3", "metadata": {}, "outputs": [], "source": [ - "# construct DDPM noise schedule\n", - "b_t = (beta2 - beta1) * torch.linspace(0, 1, timesteps + 1, device=device) + beta1\n", - "a_t = 1 - b_t\n", - "ab_t = torch.cumsum(a_t.log(), dim=0).exp() \n", - "ab_t[0] = 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b0f5bed", - "metadata": {}, - "outputs": [], - "source": [ - "# helper function; removes the predicted noise (but adds some noise back in to avoid collapse)\n", - "def denoise_add_noise(x, t, pred_noise, z=None):\n", - " if z is None:\n", - " z = torch.randn_like(x)\n", - " noise = b_t.sqrt()[t] * z\n", - " mean = (x - pred_noise * ((1 - a_t[t]) / (1 - ab_t[t]).sqrt())) / a_t[t].sqrt()\n", - " return mean + noise" - ] - }, - { - "cell_type": "markdown", - "id": "895b2bbc-46cd-4be8-a672-e84ef69bc329", - "metadata": {}, - "source": [ - "sample with context using standard algorithm\n", - "we make a change to the original algorithm to allow for context \n", - "and pass a fixed noise tensor (samples)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16085a65", - "metadata": {}, - "outputs": [], - "source": [ - "@torch.no_grad()\n", - "def sample_ddpm_context(samples, context, save_rate=20):\n", - " # array to keep track of generated steps for plotting\n", - " intermediate = [] \n", - " for i in range(timesteps, 0, -1):\n", - " # reshape time tensor\n", - " t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n", - "\n", - " # sample some random noise to inject back in. For i = 1, don't add back in noise\n", - " z = torch.randn_like(samples) if i > 1 else 0\n", - "\n", - " eps = nn_model(samples, t, c=context) # predict noise\n", - " samples = denoise_add_noise(samples, i, eps, z)\n", - " if i % save_rate==0 or i==timesteps or i<8:\n", - " print(f'sampling timestep {i:3d}', end='\\r')\n", - " intermediate.append(samples.detach().cpu().numpy())\n", - "\n", - " intermediate = np.stack(intermediate)\n", - " return samples.clip(-1, 1), intermediate" + "_, sample_ddpm_context = setup_ddpm(beta1, beta2, timesteps, device)" ] }, { @@ -260,49 +202,11 @@ { "cell_type": "code", "execution_count": null, - "id": "e12affd6-0caa-4e00-8499-c5a7495bc7bc", + "id": "9c1a945d", "metadata": {}, "outputs": [], "source": [ - "# define sampling function for DDIM \n", - "# removes the noise using ddim\n", - "def denoise_ddim(x, t, t_prev, pred_noise):\n", - " ab = ab_t[t]\n", - " ab_prev = ab_t[t_prev]\n", - " \n", - " x0_pred = ab_prev.sqrt() / ab.sqrt() * (x - (1 - ab).sqrt() * pred_noise)\n", - " dir_xt = (1 - ab_prev).sqrt() * pred_noise\n", - "\n", - " return x0_pred + dir_xt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5fdfc048-47f0-43b5-983e-da715e1ed562", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# fast sampling algorithm with context\n", - "@torch.no_grad()\n", - "def sample_ddim_context(samples, context, n=25): \n", - " # array to keep track of generated steps for plotting\n", - " intermediate = [] \n", - " step_size = timesteps // n\n", - " for i in range(timesteps, 0, -step_size):\n", - " print(f'sampling timestep {i:3d}', end='\\r')\n", - "\n", - " # reshape time tensor\n", - " t = torch.tensor([i / timesteps])[:, None, None, None].to(device)\n", - "\n", - " eps = nn_model(samples, t, c=context) # predict noise e_(x_t,t)\n", - " samples = denoise_ddim(samples, i, i - step_size, eps)\n", - " intermediate.append(samples.detach().cpu().numpy())\n", - "\n", - " intermediate = np.stack(intermediate)\n", - " return samples.clip(-1, 1), intermediate" + "sample_ddim_context = setup_ddim(beta1, beta2, timesteps, device)" ] }, { @@ -338,7 +242,7 @@ "metadata": {}, "outputs": [], "source": [ - "ddpm_samples, _ = sample_ddpm_context(noises, ctx_vector)" + "ddpm_samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector)" ] }, { @@ -356,7 +260,15 @@ "metadata": {}, "outputs": [], "source": [ - "ddim_samples, _ = sample_ddim_context(noises, ctx_vector, n=ddim_n)" + "ddim_samples, _ = sample_ddim_context(nn_model, noises, ctx_vector, n=ddim_n)" + ] + }, + { + "cell_type": "markdown", + "id": "e2eb0969", + "metadata": {}, + "source": [ + "Let's put the class names as a column on the table" ] }, { @@ -419,14 +331,6 @@ "with wandb.init(project=PROJECT, job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n", " wandb.log({\"samplers_table\":table})" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a21faa2f-f43a-40c3-9041-7d07d73a358e", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -445,7 +349,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.9" } }, "nbformat": 4, From 223a9cc55ddd01c4c63599887253ba9e50c935be Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Mon, 17 Jul 2023 14:13:08 +0200 Subject: [PATCH 29/43] add pbar description --- dlai/utilities.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dlai/utilities.py b/dlai/utilities.py index f69a0b9a..f61af8eb 100644 --- a/dlai/utilities.py +++ b/dlai/utilities.py @@ -426,7 +426,9 @@ def _denoise_add_noise(x, t, pred_noise, z=None): def sample_ddpm_context(nn_model, noises, context, save_rate=20): # array to keep track of generated steps for plotting intermediate = [] - for i in tqdm(range(timesteps, 0, -1), leave=False): + for i in (pbar:=tqdm(range(timesteps, 0, -1), leave=False)): + pbar.set_description(f'sampling timestep {i:3d}') + # reshape time tensor t = torch.tensor([i / timesteps])[:, None, None, None].to(noises.device) @@ -466,8 +468,8 @@ def sample_ddim_context(nn_model, noises, context, n=25): # array to keep track of generated steps for plotting intermediate = [] step_size = timesteps // n - for i in tqdm(range(timesteps, 0, -step_size), leave=False): - print(f'sampling timestep {i:3d}', end='\r') + for i in (pbar:=tqdm(range(timesteps, 0, -step_size), leave=False)): + pbar.set_description(f'sampling timestep {i:3d}') # reshape time tensor t = torch.tensor([i / timesteps])[:, None, None, None].to(device) From 9f6301c43945b85af2cc041a9ff7022c72bdc52f Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Mon, 17 Jul 2023 14:15:05 +0200 Subject: [PATCH 30/43] rename project --- dlai/01_diffusion_training.ipynb | 2 +- dlai/02_diffusion_sampling.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index 13111df4..920ac728 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -209,7 +209,7 @@ "source": [ "# same code as before, added comments on the extra W&B instrumentation lines\n", "# create a wandb run\n", - "run = wandb.init(project=\"sprite_diffusion\", \n", + "run = wandb.init(project=\"dlai_sprite_diffusion\", \n", " job_type=\"train\", \n", " anonymous=\"allow\", \n", " config=config)\n", diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index 93d816ac..8f4116c6 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -58,7 +58,7 @@ "outputs": [], "source": [ "# Wandb Params\n", - "PROJECT = \"sprite_diffusion\"\n", + "PROJECT = \"dlai_sprite_diffusion\"\n", "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:latest\" \n", "\n", "# ddpm sampler hyperparameters\n", From 27b45b91345ef54d96e91e3671246323d704aa80 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 11:23:56 +0200 Subject: [PATCH 31/43] simplify loading from dir --- dlai/utilities.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dlai/utilities.py b/dlai/utilities.py index f61af8eb..e280e668 100644 --- a/dlai/utilities.py +++ b/dlai/utilities.py @@ -349,9 +349,11 @@ def __init__(self, sprites, slabels, transform=default_tfms, null_context=False, self.null_context = null_context @classmethod - def from_np(cls, sfilename, lfilename, transform=default_tfms, null_context=False, argmax=False): - sprites = np.load(sfilename) - slabels = np.load(lfilename) + def from_np(cls, + path, + sfilename="sprites_1788_16x16.npy", lfilename="sprite_labels_nc_1788_16x16.npy", transform=default_tfms, null_context=False, argmax=False): + sprites = np.load(Path(path)/sfilename) + slabels = np.load(Path(path)/lfilename) return cls(sprites, slabels, transform, null_context, argmax) # Return the number of images in the dataset @@ -426,7 +428,8 @@ def _denoise_add_noise(x, t, pred_noise, z=None): def sample_ddpm_context(nn_model, noises, context, save_rate=20): # array to keep track of generated steps for plotting intermediate = [] - for i in (pbar:=tqdm(range(timesteps, 0, -1), leave=False)): + pbar = tqdm(range(timesteps, 0, -1), leave=False) + for i in pbar: pbar.set_description(f'sampling timestep {i:3d}') # reshape time tensor @@ -468,7 +471,8 @@ def sample_ddim_context(nn_model, noises, context, n=25): # array to keep track of generated steps for plotting intermediate = [] step_size = timesteps // n - for i in (pbar:=tqdm(range(timesteps, 0, -step_size), leave=False)): + pbar=tqdm(range(timesteps, 0, -step_size), leave=False) + for i in pbar: pbar.set_description(f'sampling timestep {i:3d}') # reshape time tensor From 738d07d202a3c868b73298b6b51cecc3ddb0960c Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 11:32:48 +0200 Subject: [PATCH 32/43] simplify more, config and formats --- dlai/01_diffusion_training.ipynb | 108 ++++++++++++++----------------- dlai/02_diffusion_sampling.ipynb | 62 ++++++++---------- 2 files changed, 75 insertions(+), 95 deletions(-) diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index 920ac728..8c06d2f8 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "958524a2-cb56-439e-850e-032dd10478f2", "metadata": {}, @@ -46,7 +45,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "7c0d229a", "metadata": {}, @@ -63,44 +61,32 @@ }, "outputs": [], "source": [ - "# hyperparameters\n", - "num_samples = 30\n", - "\n", - "# diffusion hyperparameters\n", - "timesteps = 500\n", - "beta1 = 1e-4\n", - "beta2 = 0.02\n", + "# we are storing the parameters to be logged to wandb\n", + "DATA_DIR = Path('./data/')\n", + "SAVE_DIR = Path('./data/weights/')\n", + "SAVE_DIR.mkdir(exist_ok=True, parents=True)\n", "\n", - "# network hyperparameters\n", - "device = get_device()\n", - "n_feat = 64 # 64 hidden dimension feature\n", - "n_cfeat = 5 # context vector is of size 5\n", - "height = 16 # 16x16 image\n", - "data_dir = Path('./data/')\n", - "save_dir = Path('./data/weights/')\n", - "save_dir.mkdir(exist_ok=True, parents=True)\n", + "config = SimpleNamespace(\n", + " # hyperparameters\n", + " num_samples = 30,\n", "\n", + " # diffusion hyperparameters\n", + " timesteps = 500,\n", + " beta1 = 1e-4,\n", + " beta2 = 0.02,\n", "\n", - "# training hyperparameters\n", - "batch_size = 100\n", - "n_epoch = 32\n", - "lrate=1e-3\n", + " # network hyperparameters\n", + " n_feat = 64, # 64 hidden dimension feature\n", + " n_cfeat = 5, # context vector is of size 5\n", + " height = 16, # 16x16 image\n", + " \n", + " # training hyperparameters\n", + " batch_size = 100,\n", + " n_epoch = 32,\n", + " lrate = 1e-3,\n", + ")\n", "\n", - "# we are storing the parameters to be logged to wandb\n", - "config = SimpleNamespace(\n", - " num_samples=num_samples,\n", - " timesteps=timesteps,\n", - " beta1=beta1,\n", - " beta2=beta2,\n", - " device=device,\n", - " n_feat=n_feat,\n", - " n_cfeat=n_cfeat,\n", - " height=height,\n", - " save_dir=save_dir,\n", - " batch_size=batch_size,\n", - " n_epoch=n_epoch,\n", - " lrate=lrate,\n", - ")" + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"" ] }, { @@ -120,7 +106,10 @@ "metadata": {}, "outputs": [], "source": [ - "perturb_input, sample_ddpm_context = setup_ddpm(beta1, beta2, timesteps, device)" + "perturb_input, sample_ddpm_context = setup_ddpm(config.beta1, \n", + " config.beta2, \n", + " config.timesteps, \n", + " device)" ] }, { @@ -133,7 +122,11 @@ "outputs": [], "source": [ "# construct model\n", - "nn_model = ContextUnet(in_channels=3, n_feat=n_feat, n_cfeat=n_cfeat, height=height).to(device)" + "nn_model = ContextUnet(\n", + " in_channels=3, \n", + " n_feat=config.n_feat, \n", + " n_cfeat=config.n_cfeat, \n", + " height=config.height).to(device)" ] }, { @@ -144,13 +137,15 @@ "outputs": [], "source": [ "# load dataset and construct optimizer\n", - "dataset = CustomDataset.from_np(data_dir/\"sprites_1788_16x16.npy\", data_dir/\"sprite_labels_nc_1788_16x16.npy\")\n", - "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)\n", - "optim = torch.optim.Adam(nn_model.parameters(), lr=lrate, eps=1e-5)" + "dataset = CustomDataset.from_np(path=DATA_DIR)\n", + "dataloader = DataLoader(dataset, \n", + " batch_size=config.batch_size, \n", + " shuffle=True, \n", + " num_workers=1)\n", + "optim = torch.optim.Adam(nn_model.parameters(), lr=config.lrate)" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "d9ed46d7", "metadata": {}, @@ -159,7 +154,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c", "metadata": {}, @@ -178,7 +172,7 @@ "source": [ "# Noise vector\n", "# x_T ~ N(0, 1), sample initial noise\n", - "noises = torch.randn(num_samples, 3, height, height).to(device) \n", + "noises = torch.randn(config.num_samples, 3, config.height, config.height).to(device) \n", "\n", "# A fixed context vector to sample from\n", "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", @@ -190,7 +184,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "26765a7e-4ddc-449e-95c3-54c58a564738", "metadata": {}, @@ -214,10 +207,13 @@ " anonymous=\"allow\", \n", " config=config)\n", "\n", - "for ep in tqdm(range(n_epoch), leave=True, total=n_epoch):\n", + "# we pass the config back from W&B\n", + "config = wandb.config\n", + "\n", + "for ep in tqdm(range(config.n_epoch), leave=True, total=config.n_epoch):\n", " # set into train mode\n", " nn_model.train()\n", - " optim.param_groups[0]['lr'] = lrate*(1-ep/n_epoch)\n", + " optim.param_groups[0]['lr'] = config.lrate*(1-ep/config.n_epoch)\n", " \n", " pbar = tqdm(dataloader, leave=False)\n", " for x, c in pbar: # x: images c: context\n", @@ -227,9 +223,9 @@ " context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n", " c = c * context_mask.unsqueeze(-1) \n", " noise = torch.randn_like(x)\n", - " t = torch.randint(1, timesteps + 1, (x.shape[0],)).to(device) \n", + " t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(device) \n", " x_pert = perturb_input(x, t, noise) \n", - " pred_noise = nn_model(x_pert, t / timesteps, c=c) \n", + " pred_noise = nn_model(x_pert, t / config.timesteps, c=c) \n", " loss = F.mse_loss(pred_noise, noise)\n", " loss.backward() \n", " optim.step()\n", @@ -242,7 +238,7 @@ " # save model periodically\n", " if ep%4==0 or ep == int(n_epoch-1):\n", " nn_model.eval()\n", - " ckpt_file = save_dir/f\"context_model.pth\"\n", + " ckpt_file = SAVE_DIR/f\"context_model.pth\"\n", " torch.save(nn_model.state_dict(), ckpt_file)\n", "\n", " # save model to wandb as an Artifact\n", @@ -253,25 +249,17 @@ " wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n", "\n", " # sample the model and log the images to W&B\n", - " samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector[:num_samples])\n", + " samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector[:config.num_samples])\n", " wandb.log({\"train_samples\": [wandb.Image(img) for img in samples.split(1)]})\n", "\n", "# finish W&B run\n", "wandb.finish()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f676315f", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index 8f4116c6..8396b280 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -58,29 +58,25 @@ "outputs": [], "source": [ "# Wandb Params\n", - "PROJECT = \"dlai_sprite_diffusion\"\n", "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:latest\" \n", "\n", - "# ddpm sampler hyperparameters\n", - "timesteps = 500\n", - "beta1 = 1e-4\n", - "beta2 = 0.02\n", - "num_samples = 30\n", - "height = 16\n", - "ddim_n = 25\n", - "\n", - "device = get_device()\n", - "\n", - "# we are storing the parameters in a dictionary to be logged to wandb\n", "config = SimpleNamespace(\n", - " timesteps=timesteps,\n", - " beta1=beta1,\n", - " beta2=beta2,\n", - " num_samples=num_samples,\n", - " height=height,\n", - " ddim_n=ddim_n,\n", - " device=device,\n", - ")" + " # hyperparameters\n", + " num_samples = 30,\n", + " \n", + " # ddpm sampler hyperparameters\n", + " timesteps = 500,\n", + " beta1 = 1e-4,\n", + " beta2 = 0.02,\n", + " \n", + " # ddim sampler hp\n", + " ddim_n = 25,\n", + " \n", + " # network hyperparameters\n", + " height = 16,\n", + ")\n", + "\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"" ] }, { @@ -158,7 +154,10 @@ "metadata": {}, "outputs": [], "source": [ - "_, sample_ddpm_context = setup_ddpm(beta1, beta2, timesteps, device)" + "_, sample_ddpm_context = setup_ddpm(config.beta1, \n", + " config.beta2, \n", + " config.timesteps, \n", + " device)" ] }, { @@ -180,7 +179,7 @@ "source": [ "# Noise vector\n", "# x_T ~ N(0, 1), sample initial noise\n", - "noises = torch.randn(num_samples, 3, height, height).to(device) \n", + "noises = torch.randn(config.num_samples, 3, config.height, config.height).to(device) \n", "\n", "# A fixed context vector to sample from\n", "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", @@ -206,7 +205,7 @@ "metadata": {}, "outputs": [], "source": [ - "sample_ddim_context = setup_ddim(beta1, beta2, timesteps, device)" + "sample_ddim_context = setup_ddim(config.beta1, config.beta2, config.timesteps, device)" ] }, { @@ -232,6 +231,7 @@ "id": "90b838be-8fa1-4c12-9c4f-e40dfacc08e1", "metadata": {}, "source": [ + "### Sampling:\n", "let's compute ddpm samples as before" ] }, @@ -260,7 +260,7 @@ "metadata": {}, "outputs": [], "source": [ - "ddim_samples, _ = sample_ddim_context(nn_model, noises, ctx_vector, n=ddim_n)" + "ddim_samples, _ = sample_ddim_context(nn_model, noises, ctx_vector, n=config.ddim_n)" ] }, { @@ -283,14 +283,6 @@ " return [classes[i] for i in ctx_vector.argmax(dim=1)]" ] }, - { - "cell_type": "markdown", - "id": "daea8275-0356-452e-a9f9-2824ef53f1ea", - "metadata": {}, - "source": [ - "Let's keep track of the sampling params on a dictionary" - ] - }, { "cell_type": "markdown", "id": "85be303d-0f0b-4df4-8c87-bd1bfb6145a2", @@ -307,7 +299,7 @@ "outputs": [], "source": [ "for noise, ddpm_sample, ddim_sample, c in zip(noises, ddpm_samples, ddim_samples, ctx_to_classes(ctx_vector)):\n", - " table.add_data(wandb.Image(noise), \n", + " table.add_data(wandb.Image(noise), # we log the input noise to analyse the impact\n", " wandb.Image(ddpm_sample), \n", " wandb.Image(ddim_sample),\n", " c)" @@ -328,14 +320,14 @@ "metadata": {}, "outputs": [], "source": [ - "with wandb.init(project=PROJECT, job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n", + "with wandb.init(project=\"dlai_sprite_diffusion\", job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n", " wandb.log({\"samplers_table\":table})" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, From 405a0cbe529aa28650551da38b41d5ff9b9fb248 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 12:06:05 +0200 Subject: [PATCH 33/43] extra tidy up --- dlai/01_diffusion_training.ipynb | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index 8c06d2f8..87302ad5 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -5,11 +5,12 @@ "id": "958524a2-cb56-439e-850e-032dd10478f2", "metadata": {}, "source": [ - "# Training a Diffusion Model with W&B\n", + "# Training a Diffusion Model with Weights and Biases (W&B)\n", "\n", - "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook and add:\n", - "- Logging of the training loss\n", - "- Sampling from the model during training and logging the samples to W&B\n", + "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook from the [\"How diffusion models works\"](https://www.deeplearning.ai/short-courses/how-diffusion-models-work/) course. \n", + "We will add:\n", + "- Logging of the training loss and metrics\n", + "- Sampling from the model during training and uploading the samples to W&B\n", "- Saving the model checkpoints to W&B" ] }, @@ -34,6 +35,14 @@ "import wandb" ] }, + { + "cell_type": "markdown", + "id": "8969ab86-bd9b-475d-96e2-b913b42dec14", + "metadata": {}, + "source": [ + "We encourage you to create an account to get the full user experience from W&B" + ] + }, { "cell_type": "code", "execution_count": null, @@ -94,7 +103,7 @@ "id": "9c99dea4", "metadata": {}, "source": [ - "setup DDPM noise scheduler and sampler (same as in the generative Ai course). \n", + "### Setup DDPM noise scheduler and sampler (same as in the Diffusion course). \n", "- perturb_input: Adds noise to the input image at the corresponding timestep on the schedule\n", "- sample_ddpm_context: Samples from the model using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing" ] @@ -158,7 +167,7 @@ "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c", "metadata": {}, "source": [ - "we choose a fixed context vector with 6 of each class, this way we know what to expect on the workspace." + "We choose a fixed context vector with 6 samples of each class to guide our diffusion" ] }, { @@ -200,7 +209,6 @@ "metadata": {}, "outputs": [], "source": [ - "# same code as before, added comments on the extra W&B instrumentation lines\n", "# create a wandb run\n", "run = wandb.init(project=\"dlai_sprite_diffusion\", \n", " job_type=\"train\", \n", From d7c70297d66e53576f85e3eabafe2ab1f00d4343 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 13:47:34 +0200 Subject: [PATCH 34/43] fix loading CustomDataset --- dlai/utilities.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dlai/utilities.py b/dlai/utilities.py index e280e668..f6d8269a 100644 --- a/dlai/utilities.py +++ b/dlai/utilities.py @@ -386,10 +386,7 @@ def split(self, pct=0.2): def get_dataloaders(data_dir, batch_size, slice_size=None, valid_pct=0.2): "Get train/val dataloaders for classification on sprites dataset" - dataset = CustomDataset.from_np(Path(data_dir)/"sprites_1788_16x16.npy", - Path(data_dir)/"sprite_labels_nc_1788_16x16.npy", - argmax=True) - + dataset = CustomDataset.from_np(Path(data_dir), argmax=True) if slice_size: dataset = dataset.subset(slice_size) From c1172ded4dc589d9f7e212bfb0fca0adc19f5511 Mon Sep 17 00:00:00 2001 From: kldarek Date: Tue, 18 Jul 2023 13:53:19 +0200 Subject: [PATCH 35/43] simplification --- dlai/00_intro.ipynb | 54 ++++++++++----------------------------------- 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb index 54b61396..3e32c75d 100644 --- a/dlai/00_intro.ipynb +++ b/dlai/00_intro.ipynb @@ -20,15 +20,12 @@ "import math\n", "from pathlib import Path\n", "from types import SimpleNamespace\n", - "import wandb\n", - "\n", "from tqdm.auto import tqdm\n", "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "from torch.optim import Adam\n", - "\n", - "from utilities import *" + "from utilities import get_dataloaders" ] }, { @@ -95,12 +92,7 @@ "source": [ "def train_model(config):\n", " \"Train a model with a given config\"\n", - " # Start a wandb run\n", - " wandb.init(\n", - " project=\"dlai-intro\",\n", - " config=config,\n", - " anonymous=\"allow\",\n", - " )\n", + " \n", " # Get the data\n", " train_dl, valid_dl = get_dataloaders(DATA_DIR, \n", " config.batch_size, \n", @@ -132,25 +124,17 @@ " example_ct += len(images)\n", " metrics = {\n", " \"train/train_loss\": train_loss,\n", - " \"train/epoch\": (step + 1 + (n_steps_per_epoch * epoch))/n_steps_per_epoch,\n", + " \"train/epoch\": epoch + 1,\n", " \"train/example_ct\": example_ct\n", " }\n", - " # Log train metrics to wandb\n", - " wandb.log(metrics)\n", " \n", " # Compute validation metrics, log images on last epoch\n", - " val_loss, accuracy = validate_model(model, valid_dl, loss_func, \n", - " log_images=(epoch == (config.epochs - 1)))\n", - "\n", - " # Log train and validation metrics to wandb\n", + " val_loss, accuracy = validate_model(model, valid_dl, loss_func)\n", + " # Compute train and validation metrics\n", " val_metrics = {\n", " \"val/val_loss\": val_loss,\n", " \"val/val_accuracy\": accuracy\n", - " }\n", - " # Log validation metrics to wandb\n", - " wandb.log(val_metrics)\n", - "\n", - " wandb.finish()" + " }\n" ] }, { @@ -160,8 +144,8 @@ "metadata": {}, "outputs": [], "source": [ - "def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):\n", - " \"Compute the performance of the model on the validation dataset and log a wandb.Table\"\n", + "def validate_model(model, valid_dl, loss_func):\n", + " \"Compute the performance of the model on the validation dataset\"\n", " model.eval()\n", " val_loss = 0.0\n", " correct = 0\n", @@ -177,22 +161,8 @@ " # Compute accuracy and accumulate\n", " _, predicted = torch.max(outputs.data, 1)\n", " correct += (predicted == labels).sum().item()\n", - "\n", - " # Log one batch of images to the dashboard, always same batch_idx.\n", - " if i == batch_idx and log_images:\n", - " log_image_predictions_table(images, predicted, labels, outputs.softmax(dim=1))\n", - "\n", - " return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n", - "\n", - "def log_image_predictions_table(images, predicted, labels, probs):\n", - " \"Create a wandb Table to log images, labels, and predictions\"\n", - " columns = [\"image\", \"pred\", \"target\"] + [f\"score_{i}\" for i in range(OUTPUT_SIZE)]\n", - " table = wandb.Table(columns=columns)\n", - " \n", - " for img, pred, targ, prob in zip(images.cpu(), predicted.cpu(), labels.cpu(), probs.cpu()):\n", - " table.add_data(wandb.Image(img), CLASSES[pred], CLASSES[targ], *prob.numpy())\n", - " \n", - " wandb.log({\"predictions_table\": table}, commit=False)\n" + " \n", + " return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)\n" ] }, { @@ -211,7 +181,7 @@ "metadata": {}, "outputs": [], "source": [ - "# wandb.login() # uncomment if you want to login to wandb" + "wandb.login(anonymous=\"allow\")" ] }, { @@ -289,7 +259,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.8.13" } }, "nbformat": 4, From 3df314fd5e125ca46595aebfa74636123e4a7303 Mon Sep 17 00:00:00 2001 From: kldarek Date: Tue, 18 Jul 2023 14:29:30 +0200 Subject: [PATCH 36/43] simplification2 --- dlai/00_intro.ipynb | 51 ++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb index 3e32c75d..3d4b0a8c 100644 --- a/dlai/00_intro.ipynb +++ b/dlai/00_intro.ipynb @@ -25,7 +25,9 @@ "import torch.nn as nn\n", "import torch.nn.functional as F\n", "from torch.optim import Adam\n", - "from utilities import get_dataloaders" + "from utilities import get_dataloaders\n", + "\n", + "import wandb" ] }, { @@ -92,7 +94,11 @@ "source": [ "def train_model(config):\n", " \"Train a model with a given config\"\n", - " \n", + " # Start a wandb run\n", + " wandb.init(\n", + " project=\"dlai-intro\",\n", + " config=config,\n", + " )\n", " # Get the data\n", " train_dl, valid_dl = get_dataloaders(DATA_DIR, \n", " config.batch_size, \n", @@ -127,6 +133,8 @@ " \"train/epoch\": epoch + 1,\n", " \"train/example_ct\": example_ct\n", " }\n", + " # log training metrics to wandb\n", + " wandb.log(metrics)\n", " \n", " # Compute validation metrics, log images on last epoch\n", " val_loss, accuracy = validate_model(model, valid_dl, loss_func)\n", @@ -134,7 +142,11 @@ " val_metrics = {\n", " \"val/val_loss\": val_loss,\n", " \"val/val_accuracy\": accuracy\n", - " }\n" + " }\n", + " # log validation metrics to wandb\n", + " wandb.log(val_metrics)\n", + " \n", + " wandb.finish()\n" ] }, { @@ -206,38 +218,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6d8af6b3-fdec-4f46-90b4-28585257e9cd", - "metadata": {}, - "outputs": [], - "source": [ - "config.epochs = 3\n", - "train_model(config)" - ] - }, - { - "cell_type": "markdown", - "id": "e9ecf01d", - "metadata": {}, - "source": [ - "Let's try other values of hyperparameters:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "593f7b8d-216c-4b51-a389-eaae195e5e74", - "metadata": {}, - "outputs": [], - "source": [ - "config.epochs = 1\n", - "config.lr = 1e-4\n", - "train_model(config)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9d83ea0a", + "id": "2e7c186f", "metadata": {}, "outputs": [], "source": [] From 465b872838a29047ff44a17de78988e5c8d434f4 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 17:33:07 +0200 Subject: [PATCH 37/43] split into instruct --- dlai/01_diffusion_training-instructor.ipynb | 300 ++++++++++++++++++++ dlai/01_diffusion_training.ipynb | 30 +- dlai/utilities.py | 6 +- 3 files changed, 315 insertions(+), 21 deletions(-) create mode 100644 dlai/01_diffusion_training-instructor.ipynb diff --git a/dlai/01_diffusion_training-instructor.ipynb b/dlai/01_diffusion_training-instructor.ipynb new file mode 100644 index 00000000..9010b2d3 --- /dev/null +++ b/dlai/01_diffusion_training-instructor.ipynb @@ -0,0 +1,300 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "958524a2-cb56-439e-850e-032dd10478f2", + "metadata": {}, + "source": [ + "# Training a Diffusion Model with Weights and Biases (W&B)\n", + "\n", + "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook from the [\"How diffusion models work\"](https://www.deeplearning.ai/short-courses/how-diffusion-models-work/) course. \n", + "We will add:\n", + "- Logging of the training loss and metrics\n", + "- Sampling from the model during training and uploading the samples to W&B\n", + "- Saving the model checkpoints to W&B" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "700e687c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from types import SimpleNamespace\n", + "from pathlib import Path\n", + "from tqdm.notebook import tqdm\n", + "import torch\n", + "import torch.nn.functional as F\n", + "from torch.utils.data import DataLoader\n", + "import numpy as np\n", + "from utilities import *\n", + "\n", + "import wandb" + ] + }, + { + "cell_type": "markdown", + "id": "8969ab86-bd9b-475d-96e2-b913b42dec14", + "metadata": {}, + "source": [ + "We encourage you to create an account to get the full user experience from W&B" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b88f9513", + "metadata": {}, + "outputs": [], + "source": [ + "wandb.login(anonymous=\"allow\")" + ] + }, + { + "cell_type": "markdown", + "id": "7c0d229a", + "metadata": {}, + "source": [ + "## Setting Things Up" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d76c167-7122-4f88-9c9f-5ded96684fa5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# we are storing the parameters to be logged to wandb\n", + "DATA_DIR = Path('./data/')\n", + "SAVE_DIR = Path('./data/weights/')\n", + "SAVE_DIR.mkdir(exist_ok=True, parents=True)\n", + "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "\n", + "config = SimpleNamespace(\n", + " # hyperparameters\n", + " num_samples = 30,\n", + "\n", + " # diffusion hyperparameters\n", + " timesteps = 500,\n", + " beta1 = 1e-4,\n", + " beta2 = 0.02,\n", + "\n", + " # network hyperparameters\n", + " n_feat = 64, # 64 hidden dimension feature\n", + " n_cfeat = 5, # context vector is of size 5\n", + " height = 16, # 16x16 image\n", + " \n", + " # training hyperparameters\n", + " batch_size = 100,\n", + " n_epoch = 32,\n", + " lrate = 1e-3,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9c99dea4", + "metadata": {}, + "source": [ + "### Setup DDPM noise scheduler and sampler (same as in the Diffusion course). \n", + "- perturb_input: Adds noise to the input image at the corresponding timestep on the schedule\n", + "- sample_ddpm_context: Generate images using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c642e1d", + "metadata": {}, + "outputs": [], + "source": [ + "# setup ddpm sampler functions\n", + "perturb_input, sample_ddpm_context = setup_ddpm(config.beta1, \n", + " config.beta2, \n", + " config.timesteps, \n", + " DEVICE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bc9001e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# construct model\n", + "nn_model = ContextUnet(\n", + " in_channels=3, \n", + " n_feat=config.n_feat, \n", + " n_cfeat=config.n_cfeat, \n", + " height=config.height).to(DEVICE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76c63b85", + "metadata": {}, + "outputs": [], + "source": [ + "# load dataset and construct optimizer\n", + "dataset = CustomDataset.from_np(path=DATA_DIR)\n", + "dataloader = DataLoader(dataset, \n", + " batch_size=config.batch_size, \n", + " shuffle=True, \n", + " num_workers=1)\n", + "optim = torch.optim.Adam(nn_model.parameters(), lr=config.lrate)" + ] + }, + { + "cell_type": "markdown", + "id": "d9ed46d7", + "metadata": {}, + "source": [ + "## Training" + ] + }, + { + "cell_type": "markdown", + "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c", + "metadata": {}, + "source": [ + "We choose a fixed context vector with 6 samples of each class to guide our diffusion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d88afdba", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Noise vector\n", + "# x_T ~ N(0, 1), sample initial noise\n", + "noises = torch.randn(config.num_samples, 3, \n", + " config.height, config.height).to(DEVICE) \n", + "\n", + "# A fixed context vector to sample from\n", + "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", + " 1,1,1,1,1,1,\n", + " 2,2,2,2,2,2,\n", + " 3,3,3,3,3,3,\n", + " 4,4,4,4,4,4]), \n", + " 5).to(DEVICE).float()" + ] + }, + { + "cell_type": "markdown", + "id": "26765a7e-4ddc-449e-95c3-54c58a564738", + "metadata": {}, + "source": [ + "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n", + "\n", + "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua) <<" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5f4af69", + "metadata": {}, + "outputs": [], + "source": [ + "# create a wandb run\n", + "run = wandb.init(project=\"dlai_sprite_diffusion\", \n", + " job_type=\"train\", \n", + " anonymous=\"allow\", \n", + " config=config)\n", + "\n", + "# we pass the config back from W&B\n", + "config = wandb.config\n", + "\n", + "for ep in tqdm(range(config.n_epoch), leave=True, total=config.n_epoch):\n", + " # set into train mode\n", + " nn_model.train()\n", + " optim.param_groups[0]['lr'] = config.lrate*(1-ep/config.n_epoch)\n", + " \n", + " pbar = tqdm(dataloader, leave=False)\n", + " for x, c in pbar: # x: images c: context\n", + " optim.zero_grad()\n", + " x = x.to(device)\n", + " c = c.to(x) \n", + " context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n", + " c = c * context_mask.unsqueeze(-1) \n", + " noise = torch.randn_like(x)\n", + " t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(device) \n", + " x_pert = perturb_input(x, t, noise) \n", + " pred_noise = nn_model(x_pert, t / config.timesteps, c=c) \n", + " loss = F.mse_loss(pred_noise, noise)\n", + " loss.backward() \n", + " optim.step()\n", + "\n", + " # we log the relevant metrics to the workspace\n", + " wandb.log({\"loss\": loss.item(),\n", + " \"lr\": optim.param_groups[0]['lr'],\n", + " \"epoch\": ep})\n", + "\n", + " # save model periodically\n", + " if ep%4==0 or ep == int(n_epoch-1):\n", + " nn_model.eval()\n", + " ckpt_file = SAVE_DIR/f\"context_model.pth\"\n", + " torch.save(nn_model.state_dict(), ckpt_file)\n", + " \n", + " \n", + " ###########################################################\n", + " \n", + " # save model to wandb as an Artifact\n", + " artifact_name = f\"{wandb.run.id}_context_model\"\n", + " at = wandb.Artifact(artifact_name, type=\"model\")\n", + " at.add_file(ckpt_file)\n", + " wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n", + " \n", + " ###########################################################\n", + " \n", + " # sample the model and log the images to W&B\n", + " samples, _ = sample_ddpm_context(nn_model, \n", + " noises, \n", + " ctx_vector[:config.num_samples])\n", + " wandb.log({\n", + " \"train_samples\": [\n", + " wandb.Image(img) for img in samples.split(1)\n", + " ]})\n", + " \n", + " ###########################################################\n", + "# finish W&B run\n", + "wandb.finish()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index 87302ad5..f8e87ae3 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -7,7 +7,7 @@ "source": [ "# Training a Diffusion Model with Weights and Biases (W&B)\n", "\n", - "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook from the [\"How diffusion models works\"](https://www.deeplearning.ai/short-courses/how-diffusion-models-work/) course. \n", + "In this notebooks we will instrument the training of a diffusion model with W&B. We will use the Lab3 notebook from the [\"How diffusion models work\"](https://www.deeplearning.ai/short-courses/how-diffusion-models-work/) course. \n", "We will add:\n", "- Logging of the training loss and metrics\n", "- Sampling from the model during training and uploading the samples to W&B\n", @@ -74,6 +74,7 @@ "DATA_DIR = Path('./data/')\n", "SAVE_DIR = Path('./data/weights/')\n", "SAVE_DIR.mkdir(exist_ok=True, parents=True)\n", + "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "config = SimpleNamespace(\n", " # hyperparameters\n", @@ -93,9 +94,7 @@ " batch_size = 100,\n", " n_epoch = 32,\n", " lrate = 1e-3,\n", - ")\n", - "\n", - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"" + ")" ] }, { @@ -105,7 +104,7 @@ "source": [ "### Setup DDPM noise scheduler and sampler (same as in the Diffusion course). \n", "- perturb_input: Adds noise to the input image at the corresponding timestep on the schedule\n", - "- sample_ddpm_context: Samples from the model using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing" + "- sample_ddpm_context: Generate images using the DDPM sampler, we will use this function during training to sample from the model regularly and see how our training is progressing" ] }, { @@ -115,10 +114,11 @@ "metadata": {}, "outputs": [], "source": [ + "# setup ddpm sampler functions\n", "perturb_input, sample_ddpm_context = setup_ddpm(config.beta1, \n", " config.beta2, \n", " config.timesteps, \n", - " device)" + " DEVICE)" ] }, { @@ -135,7 +135,7 @@ " in_channels=3, \n", " n_feat=config.n_feat, \n", " n_cfeat=config.n_cfeat, \n", - " height=config.height).to(device)" + " height=config.height).to(DEVICE)" ] }, { @@ -181,7 +181,8 @@ "source": [ "# Noise vector\n", "# x_T ~ N(0, 1), sample initial noise\n", - "noises = torch.randn(config.num_samples, 3, config.height, config.height).to(device) \n", + "noises = torch.randn(config.num_samples, 3, \n", + " config.height, config.height).to(DEVICE) \n", "\n", "# A fixed context vector to sample from\n", "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", @@ -189,7 +190,7 @@ " 2,2,2,2,2,2,\n", " 3,3,3,3,3,3,\n", " 4,4,4,4,4,4]), \n", - " 5).to(device=device).float()" + " 5).to(DEVICE).float()" ] }, { @@ -249,17 +250,6 @@ " ckpt_file = SAVE_DIR/f\"context_model.pth\"\n", " torch.save(nn_model.state_dict(), ckpt_file)\n", "\n", - " # save model to wandb as an Artifact\n", - " artifact_name = f\"{wandb.run.id}_context_model\"\n", - " at = wandb.Artifact(artifact_name, type=\"model\", \n", - " metadata={\"loss\":loss.item(), \"epoch\":ep})\n", - " at.add_file(ckpt_file)\n", - " wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n", - "\n", - " # sample the model and log the images to W&B\n", - " samples, _ = sample_ddpm_context(nn_model, noises, ctx_vector[:config.num_samples])\n", - " wandb.log({\"train_samples\": [wandb.Image(img) for img in samples.split(1)]})\n", - "\n", "# finish W&B run\n", "wandb.finish()" ] diff --git a/dlai/utilities.py b/dlai/utilities.py index f6d8269a..a11ea3b8 100644 --- a/dlai/utilities.py +++ b/dlai/utilities.py @@ -482,4 +482,8 @@ def sample_ddim_context(nn_model, noises, context, n=25): intermediate = np.stack(intermediate) return noises.clip(-1, 1), intermediate - return sample_ddim_context \ No newline at end of file + return sample_ddim_context + +def to_classes(ctx_vector): + classes = "hero,non-hero,food,spell,side-facing".split(",") + return [classes[i] for i in ctx_vector.argmax(dim=1)] \ No newline at end of file From 475391378c792ea39dfa75e13de6808249567a2b Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 17:33:13 +0200 Subject: [PATCH 38/43] cleanup --- dlai/00_intro.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlai/00_intro.ipynb b/dlai/00_intro.ipynb index 3d4b0a8c..632292f2 100644 --- a/dlai/00_intro.ipynb +++ b/dlai/00_intro.ipynb @@ -226,7 +226,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, From d25e2da518fe423906b3dfc4a91db8a1aaf237dc Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 17:33:23 +0200 Subject: [PATCH 39/43] shorter lines --- dlai/02_diffusion_sampling.ipynb | 75 +++++++++++++++----------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index 8396b280..a814b291 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -20,8 +20,8 @@ }, "outputs": [], "source": [ - "from types import SimpleNamespace\n", "from pathlib import Path\n", + "from types import SimpleNamespace\n", "import torch\n", "import torch.nn.functional as F\n", "import numpy as np\n", @@ -58,7 +58,8 @@ "outputs": [], "source": [ "# Wandb Params\n", - "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:latest\" \n", + "MODEL_ARTIFACT = \"deeplearning-ai-temp/model-registry/SpriteGen:staging\" \n", + "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "\n", "config = SimpleNamespace(\n", " # hyperparameters\n", @@ -74,9 +75,7 @@ " \n", " # network hyperparameters\n", " height = 16,\n", - ")\n", - "\n", - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"" + ")" ] }, { @@ -118,7 +117,7 @@ "\n", " # set the model to eval mode\n", " model.eval()\n", - " return model.to(device)" + " return model.to(DEVICE)" ] }, { @@ -157,7 +156,7 @@ "_, sample_ddpm_context = setup_ddpm(config.beta1, \n", " config.beta2, \n", " config.timesteps, \n", - " device)" + " DEVICE)" ] }, { @@ -179,7 +178,8 @@ "source": [ "# Noise vector\n", "# x_T ~ N(0, 1), sample initial noise\n", - "noises = torch.randn(config.num_samples, 3, config.height, config.height).to(device) \n", + "noises = torch.randn(config.num_samples, 3, \n", + " config.height, config.height).to(DEVICE) \n", "\n", "# A fixed context vector to sample from\n", "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", @@ -187,7 +187,7 @@ " 2,2,2,2,2,2,\n", " 3,3,3,3,3,3,\n", " 4,4,4,4,4,4]), \n", - " 5).to(device=device).float()" + " 5).to(DEVICE).float()" ] }, { @@ -205,25 +205,10 @@ "metadata": {}, "outputs": [], "source": [ - "sample_ddim_context = setup_ddim(config.beta1, config.beta2, config.timesteps, device)" - ] - }, - { - "cell_type": "markdown", - "id": "aee10774-ff79-4df7-9b2d-1908561c23e5", - "metadata": {}, - "source": [ - "Let's create a `wandb.Table` to store our generations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d7d20ffa-552e-4836-8c98-7655ca92cba2", - "metadata": {}, - "outputs": [], - "source": [ - "table = wandb.Table(columns=[\"input_noise\", \"ddpm\", \"ddim\", \"class\"])" + "sample_ddim_context = setup_ddim(config.beta1, \n", + " config.beta2, \n", + " config.timesteps, \n", + " DEVICE)" ] }, { @@ -260,27 +245,29 @@ "metadata": {}, "outputs": [], "source": [ - "ddim_samples, _ = sample_ddim_context(nn_model, noises, ctx_vector, n=config.ddim_n)" + "ddim_samples, _ = sample_ddim_context(nn_model, \n", + " noises, \n", + " ctx_vector, \n", + " n=config.ddim_n)" ] }, { "cell_type": "markdown", - "id": "e2eb0969", + "id": "5db3cb01", "metadata": {}, "source": [ - "Let's put the class names as a column on the table" + "### Visualizing generations on a Table\n", + "Let's create a `wandb.Table` to store our generations" ] }, { "cell_type": "code", "execution_count": null, - "id": "af33d3c4", + "id": "0f1d3b94", "metadata": {}, "outputs": [], "source": [ - "def ctx_to_classes(ctx_vector):\n", - " classes = \"hero,non-hero,food,spell,side-facing\".split(\",\")\n", - " return [classes[i] for i in ctx_vector.argmax(dim=1)]" + "table = wandb.Table(columns=[\"input_noise\", \"ddpm\", \"ddim\", \"class\"])" ] }, { @@ -298,10 +285,15 @@ "metadata": {}, "outputs": [], "source": [ - "for noise, ddpm_sample, ddim_sample, c in zip(noises, ddpm_samples, ddim_samples, ctx_to_classes(ctx_vector)):\n", - " table.add_data(wandb.Image(noise), # we log the input noise to analyse the impact\n", - " wandb.Image(ddpm_sample), \n", - " wandb.Image(ddim_sample),\n", + "for noise, ddpm_s, ddim_s, c in zip(noises, \n", + " ddpm_samples, \n", + " ddim_samples, \n", + " to_classes(ctx_vector)):\n", + " \n", + " # add data row by row to the Table\n", + " table.add_data(wandb.Image(noise),\n", + " wandb.Image(ddpm_s), \n", + " wandb.Image(ddim_s),\n", " c)" ] }, @@ -320,7 +312,10 @@ "metadata": {}, "outputs": [], "source": [ - "with wandb.init(project=\"dlai_sprite_diffusion\", job_type=\"samplers_battle\", anonymous=\"allow\", config=config):\n", + "with wandb.init(project=\"dlai_sprite_diffusion\", \n", + " job_type=\"samplers_battle\", \n", + " config=config):\n", + " \n", " wandb.log({\"samplers_table\":table})" ] } From 47d2af00aeaaa3a526bd48a9edcdba92f5a7eaa3 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 19:09:07 +0200 Subject: [PATCH 40/43] split prefix + prompt --- dlai/04_train_llm.ipynb | 371 ++++++++++++++++++++++++++++++++-------- 1 file changed, 304 insertions(+), 67 deletions(-) diff --git a/dlai/04_train_llm.ipynb b/dlai/04_train_llm.ipynb index 5148cb2c..e08cdd66 100644 --- a/dlai/04_train_llm.ipynb +++ b/dlai/04_train_llm.ipynb @@ -3,7 +3,9 @@ { "cell_type": "markdown", "id": "1dfae479-9399-492d-acaa-d9751615ee86", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "# Finetuning a language model\n", "Let's see how to finetune a language model to generate character backstories using HuggingFace Trainer with wandb integration. We'll use a tiny language model (`TinyStories-33M`) due to resource constraints, but the lessons you learn here should be applicable to large models too!" @@ -11,13 +13,11 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "f64e095c-d28b-44aa-a122-4121c5c66a5b", + "execution_count": 1, + "id": "a1f0e67f", "metadata": {}, "outputs": [], "source": [ - "import os\n", - "\n", "from transformers import AutoTokenizer\n", "from datasets import load_dataset\n", "from transformers import AutoModelForCausalLM\n", @@ -28,7 +28,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, + "id": "f79c25e3-5f18-4457-84e1-ed2c0d262222", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mcapecape\u001b[0m (\u001b[33mdeeplearning-ai-temp\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wandb.login(anonymous=\"allow\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "id": "2286ae41-213d-480d-a4ba-8c4e2e1c4771", "metadata": {}, "outputs": [], @@ -46,22 +74,64 @@ "We'll start by loading a dataset containing Dungeons and Dragons character biographies from Huggingface. " ] }, + { + "cell_type": "markdown", + "id": "c9288a8e-b19b-4bd2-a72c-7dda03632282", + "metadata": {}, + "source": [ + "> You can expect to get some warning here, this is ok" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "a7535b8b-d220-44e8-a56c-97e250c36596", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Found cached dataset parquet (/Users/tcapelle/.cache/huggingface/datasets/MohamedRashad___parquet/MohamedRashad--characters_backstories-6398ba4bb1a6e421/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b1d8315d3ae54248840650543b19d386", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1 [00:00<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>\n" + ] + } + ], "source": [ "# Let's check out one prepared example\n", "print(tokenizer.decode(tokenized_datasets[\"train\"][900]['input_ids']))" @@ -124,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "b4f131eb-979e-40f6-9e28-19756beaa8e4", "metadata": {}, "outputs": [], @@ -135,10 +251,84 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "7345ab23-8d12-4d4c-a39d-bb2202bff218", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "wandb version 0.15.5 is available! To upgrade, please run:\n", + " $ pip install wandb --upgrade" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.4" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /Users/tcapelle/work/edu/dlai/wandb/run-20230718_172033-c2lx2628" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run zany-eon-5 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/deeplearning-ai-temp/dlai-lm-tuning" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/deeplearning-ai-temp/dlai-lm-tuning/runs/c2lx2628" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# Start a new wandb run\n", "run = wandb.init(project='dlai-lm-tuning', job_type=\"training\", anonymous=\"allow\")" @@ -146,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "d74ee155-3c30-4ef2-9c4d-fd8ee222c50c", "metadata": {}, "outputs": [], @@ -161,12 +351,13 @@ " evaluation_strategy = \"epoch\",\n", " learning_rate=1e-4,\n", " weight_decay=0.01,\n", + " no_cuda=True, # force cpu use, will be renamed `use_cpu`\n", ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "af62105f-a478-436f-88a2-5c1d78b9d20a", "metadata": {}, "outputs": [], @@ -182,89 +373,135 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "01958a56-c22a-4a27-bc71-41c59fc97f05", - "metadata": {}, - "outputs": [], - "source": [ - "# Let's train!\n", - "trainer.train()" - ] - }, - { - "cell_type": "markdown", - "id": "0a247e01-35d0-454f-8b7e-5f24cdf66f33", + "execution_count": 13, + "id": "816f4c88-bcf2-474a-afbc-b646f89df86c", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "device(type='cpu')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "### Generate\n", - "Let's use our trained model to generate some texts with our provided prompts and save them in W&B Table. The model is tiny, replace it with a bigger one to get better results!" + "trainer.accelerator.device" ] }, { "cell_type": "code", - "execution_count": null, - "id": "6f16d43d-445f-4df5-8734-85584f95792f", - "metadata": {}, - "outputs": [], + "execution_count": 14, + "id": "01958a56-c22a-4a27-bc71-41c59fc97f05", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "

\n", + " \n", + " \n", + " [233/233 02:49, Epoch 1/1]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EpochTraining LossValidation Loss
15.3216003.384721

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "TrainOutput(global_step=233, training_loss=3.7527249718940308, metrics={'train_runtime': 170.973, 'train_samples_per_second': 10.861, 'train_steps_per_second': 1.363, 'total_flos': 40423258718208.0, 'train_loss': 3.7527249718940308, 'epoch': 1.0})" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "model = trainer.model\n", - "device = next(model.parameters()).device" + "# Let's train!\n", + "trainer.train()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "7911e43f-f4ce-4855-9f68-662438af8d24", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n", + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n", + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" + ] + } + ], "source": [ + "prefix = \"Generate Backstory based on following information Character Name: \"\n", + "\n", "prompts = [\n", - " \"Generate Backstory based on following information Character Name: Frogger Character Race: Aarakocra Character Class: Ranger Output: \",\n", - " \"Generate Backstory based on following information Character Name: Smarty Character Race: Aasimar Character Class: Cleric Output: \",\n", - " \"Generate Backstory based on following information Character Name: Volcano Character Race: Android Character Class: Paladin Output: \",\n", + " \"Frogger Character Race: Aarakocra Character Class: Ranger Output: \",\n", + " \"Smarty Character Race: Aasimar Character Class: Cleric Output: \",\n", + " \"Volcano Character Race: Android Character Class: Paladin Output: \",\n", "]\n", "\n", "table = wandb.Table(columns=[\"prompt\", \"generation\"])\n", "\n", "for prompt in prompts:\n", - " input_ids = tokenizer.encode(prompt, return_tensors=\"pt\").to(device)\n", + " input_ids = tokenizer.encode(prefix + prompt, return_tensors=\"pt\")\n", " output = model.generate(input_ids, do_sample=True, max_new_tokens=50, top_p=0.3)\n", " output_text = tokenizer.decode(output[0], skip_special_tokens=True)\n", - " table.add_data(prompt, output_text)\n", + " table.add_data(prefix + prompt, output_text)\n", " \n", "wandb.log({'tiny_generations': table})" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "3083c6a3-fdb8-44ab-a028-c0a222a2fdef", "metadata": {}, "outputs": [], "source": [ "wandb.finish()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "120d5e88-2460-4716-bcba-077ff4630772", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "976edc1f-418d-47a6-88e2-ca37e3b25366", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -278,7 +515,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.9" } }, "nbformat": 4, From ae4c081b22cc534233cc047e3ba7128668cb5728 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 17:30:21 +0000 Subject: [PATCH 41/43] trained nb --- dlai/01_diffusion_training-instructor.ipynb | 30 ++++++++++++--------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/dlai/01_diffusion_training-instructor.ipynb b/dlai/01_diffusion_training-instructor.ipynb index 9010b2d3..7b91e8b1 100644 --- a/dlai/01_diffusion_training-instructor.ipynb +++ b/dlai/01_diffusion_training-instructor.ipynb @@ -131,11 +131,10 @@ "outputs": [], "source": [ "# construct model\n", - "nn_model = ContextUnet(\n", - " in_channels=3, \n", - " n_feat=config.n_feat, \n", - " n_cfeat=config.n_cfeat, \n", - " height=config.height).to(DEVICE)" + "nn_model = ContextUnet(in_channels=3, \n", + " n_feat=config.n_feat, \n", + " n_cfeat=config.n_cfeat, \n", + " height=config.height).to(DEVICE)" ] }, { @@ -207,13 +206,14 @@ "cell_type": "code", "execution_count": null, "id": "f5f4af69", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# create a wandb run\n", "run = wandb.init(project=\"dlai_sprite_diffusion\", \n", " job_type=\"train\", \n", - " anonymous=\"allow\", \n", " config=config)\n", "\n", "# we pass the config back from W&B\n", @@ -227,12 +227,12 @@ " pbar = tqdm(dataloader, leave=False)\n", " for x, c in pbar: # x: images c: context\n", " optim.zero_grad()\n", - " x = x.to(device)\n", - " c = c.to(x) \n", - " context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n", + " x = x.to(DEVICE)\n", + " c = c.to(DEVICE) \n", + " context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(DEVICE)\n", " c = c * context_mask.unsqueeze(-1) \n", " noise = torch.randn_like(x)\n", - " t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(device) \n", + " t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(DEVICE) \n", " x_pert = perturb_input(x, t, noise) \n", " pred_noise = nn_model(x_pert, t / config.timesteps, c=c) \n", " loss = F.mse_loss(pred_noise, noise)\n", @@ -245,13 +245,14 @@ " \"epoch\": ep})\n", "\n", " # save model periodically\n", - " if ep%4==0 or ep == int(n_epoch-1):\n", + " if ep%4==0 or ep == int(config.n_epoch-1):\n", " nn_model.eval()\n", " ckpt_file = SAVE_DIR/f\"context_model.pth\"\n", " torch.save(nn_model.state_dict(), ckpt_file)\n", " \n", " \n", " ###########################################################\n", + " ### COPY TO DEMO NB #######################################\n", " \n", " # save model to wandb as an Artifact\n", " artifact_name = f\"{wandb.run.id}_context_model\"\n", @@ -260,6 +261,7 @@ " wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n", " \n", " ###########################################################\n", + " ### COPY TO DEMO NB #######################################\n", " \n", " # sample the model and log the images to W&B\n", " samples, _ = sample_ddpm_context(nn_model, \n", @@ -271,6 +273,8 @@ " ]})\n", " \n", " ###########################################################\n", + " ###########################################################\n", + " \n", "# finish W&B run\n", "wandb.finish()" ] @@ -292,7 +296,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.11.3" } }, "nbformat": 4, From d844d5a9310b37b4a7ddbbedc28a7a2c43b0d645 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Tue, 18 Jul 2023 17:34:25 +0000 Subject: [PATCH 42/43] clean up --- dlai/01_diffusion_training-instructor.ipynb | 2 +- dlai/01_diffusion_training.ipynb | 60 ++++++++++----------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/dlai/01_diffusion_training-instructor.ipynb b/dlai/01_diffusion_training-instructor.ipynb index 7b91e8b1..b0874865 100644 --- a/dlai/01_diffusion_training-instructor.ipynb +++ b/dlai/01_diffusion_training-instructor.ipynb @@ -199,7 +199,7 @@ "source": [ "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n", "\n", - "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua) <<" + "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/gwm91gsw) <<" ] }, { diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index f8e87ae3..4c88cd99 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "958524a2-cb56-439e-850e-032dd10478f2", + "id": "e57c5e2c-04f8-40b7-9b47-e5e05505cb2c", "metadata": {}, "source": [ "# Training a Diffusion Model with Weights and Biases (W&B)\n", @@ -17,7 +17,7 @@ { "cell_type": "code", "execution_count": null, - "id": "700e687c", + "id": "d4a34666-2281-49e3-8574-93d57c72771b", "metadata": { "tags": [] }, @@ -37,7 +37,7 @@ }, { "cell_type": "markdown", - "id": "8969ab86-bd9b-475d-96e2-b913b42dec14", + "id": "2b4dd4a3-b05e-4a7f-811e-a715573761e9", "metadata": {}, "source": [ "We encourage you to create an account to get the full user experience from W&B" @@ -46,7 +46,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b88f9513", + "id": "904d68fe-7435-48a3-b8af-c4be8675311c", "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ }, { "cell_type": "markdown", - "id": "7c0d229a", + "id": "02e2b5b2-82e4-4535-aa98-34ae64a808e8", "metadata": {}, "source": [ "## Setting Things Up" @@ -64,7 +64,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3d76c167-7122-4f88-9c9f-5ded96684fa5", + "id": "4918eda7-6d6b-4f9f-8650-c347ed4a5d1c", "metadata": { "tags": [] }, @@ -99,7 +99,7 @@ }, { "cell_type": "markdown", - "id": "9c99dea4", + "id": "1ed92a7b-b6a3-4c0c-a35d-154ec26ed923", "metadata": {}, "source": [ "### Setup DDPM noise scheduler and sampler (same as in the Diffusion course). \n", @@ -110,7 +110,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6c642e1d", + "id": "5ba81b76-6521-4c7c-80bd-bacde0361a34", "metadata": {}, "outputs": [], "source": [ @@ -124,24 +124,23 @@ { "cell_type": "code", "execution_count": null, - "id": "6bc9001e", + "id": "c83bd768-f709-410a-8062-703bde7997d8", "metadata": { "tags": [] }, "outputs": [], "source": [ "# construct model\n", - "nn_model = ContextUnet(\n", - " in_channels=3, \n", - " n_feat=config.n_feat, \n", - " n_cfeat=config.n_cfeat, \n", - " height=config.height).to(DEVICE)" + "nn_model = ContextUnet(in_channels=3, \n", + " n_feat=config.n_feat, \n", + " n_cfeat=config.n_cfeat, \n", + " height=config.height).to(DEVICE)" ] }, { "cell_type": "code", "execution_count": null, - "id": "76c63b85", + "id": "cf98a114-f7aa-4cbd-b08c-d56ad628da21", "metadata": {}, "outputs": [], "source": [ @@ -156,7 +155,7 @@ }, { "cell_type": "markdown", - "id": "d9ed46d7", + "id": "bdccd6e0-850a-41ed-89e7-db629f838770", "metadata": {}, "source": [ "## Training" @@ -164,7 +163,7 @@ }, { "cell_type": "markdown", - "id": "00b9ef16-1848-476d-a9dd-09175b8f0e3c", + "id": "2338bec6-319c-4603-8ae6-0e1fcbdd3a4e", "metadata": {}, "source": [ "We choose a fixed context vector with 6 samples of each class to guide our diffusion" @@ -173,7 +172,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d88afdba", + "id": "56bfcd32-1a9c-4d0e-8237-77da217f41ae", "metadata": { "tags": [] }, @@ -195,25 +194,26 @@ }, { "cell_type": "markdown", - "id": "26765a7e-4ddc-449e-95c3-54c58a564738", + "id": "e854b7c7-fa0d-4413-8642-f824449d6763", "metadata": {}, "source": [ "The following training cell takes very long to run on CPU, we have already trained the model for you on a GPU equipped machine.\n", "\n", - "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/sprite_diffusion/runs/lqf74fua) <<" + "### You can visit the result of this >> [training here](https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/gwm91gsw) <<" ] }, { "cell_type": "code", "execution_count": null, - "id": "f5f4af69", - "metadata": {}, + "id": "2c87ca8f-2c09-487f-a8bc-7030c2b76492", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# create a wandb run\n", "run = wandb.init(project=\"dlai_sprite_diffusion\", \n", " job_type=\"train\", \n", - " anonymous=\"allow\", \n", " config=config)\n", "\n", "# we pass the config back from W&B\n", @@ -227,12 +227,12 @@ " pbar = tqdm(dataloader, leave=False)\n", " for x, c in pbar: # x: images c: context\n", " optim.zero_grad()\n", - " x = x.to(device)\n", - " c = c.to(x) \n", - " context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(device)\n", + " x = x.to(DEVICE)\n", + " c = c.to(DEVICE) \n", + " context_mask = torch.bernoulli(torch.zeros(c.shape[0]) + 0.8).to(DEVICE)\n", " c = c * context_mask.unsqueeze(-1) \n", " noise = torch.randn_like(x)\n", - " t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(device) \n", + " t = torch.randint(1, config.timesteps + 1, (x.shape[0],)).to(DEVICE) \n", " x_pert = perturb_input(x, t, noise) \n", " pred_noise = nn_model(x_pert, t / config.timesteps, c=c) \n", " loss = F.mse_loss(pred_noise, noise)\n", @@ -245,11 +245,11 @@ " \"epoch\": ep})\n", "\n", " # save model periodically\n", - " if ep%4==0 or ep == int(n_epoch-1):\n", + " if ep%4==0 or ep == int(config.n_epoch-1):\n", " nn_model.eval()\n", " ckpt_file = SAVE_DIR/f\"context_model.pth\"\n", " torch.save(nn_model.state_dict(), ckpt_file)\n", - "\n", + " \n", "# finish W&B run\n", "wandb.finish()" ] @@ -271,7 +271,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.11.3" } }, "nbformat": 4, From f5c51a3e0cfe62c4f6f5f0c18356b9edc7b3a947 Mon Sep 17 00:00:00 2001 From: Thomas Capelle Date: Wed, 19 Jul 2023 15:54:17 +0200 Subject: [PATCH 43/43] add class names as comments --- dlai/01_diffusion_training-instructor.ipynb | 12 +- dlai/01_diffusion_training.ipynb | 29 ++- dlai/02_diffusion_sampling.ipynb | 236 ++++++++++++++++++-- 3 files changed, 241 insertions(+), 36 deletions(-) diff --git a/dlai/01_diffusion_training-instructor.ipynb b/dlai/01_diffusion_training-instructor.ipynb index b0874865..83760618 100644 --- a/dlai/01_diffusion_training-instructor.ipynb +++ b/dlai/01_diffusion_training-instructor.ipynb @@ -184,11 +184,11 @@ " config.height, config.height).to(DEVICE) \n", "\n", "# A fixed context vector to sample from\n", - "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", - " 1,1,1,1,1,1,\n", - " 2,2,2,2,2,2,\n", - " 3,3,3,3,3,3,\n", - " 4,4,4,4,4,4]), \n", + "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0, # hero\n", + " 1,1,1,1,1,1, # non-hero\n", + " 2,2,2,2,2,2, # food\n", + " 3,3,3,3,3,3, # spell\n", + " 4,4,4,4,4,4]), # side-facing \n", " 5).to(DEVICE).float()" ] }, @@ -296,7 +296,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/dlai/01_diffusion_training.ipynb b/dlai/01_diffusion_training.ipynb index 4c88cd99..b15b820d 100644 --- a/dlai/01_diffusion_training.ipynb +++ b/dlai/01_diffusion_training.ipynb @@ -184,11 +184,11 @@ " config.height, config.height).to(DEVICE) \n", "\n", "# A fixed context vector to sample from\n", - "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", - " 1,1,1,1,1,1,\n", - " 2,2,2,2,2,2,\n", - " 3,3,3,3,3,3,\n", - " 4,4,4,4,4,4]), \n", + "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0, # hero\n", + " 1,1,1,1,1,1, # non-hero\n", + " 2,2,2,2,2,2, # food\n", + " 3,3,3,3,3,3, # spell\n", + " 4,4,4,4,4,4]), # side-facing \n", " 5).to(DEVICE).float()" ] }, @@ -248,7 +248,22 @@ " if ep%4==0 or ep == int(config.n_epoch-1):\n", " nn_model.eval()\n", " ckpt_file = SAVE_DIR/f\"context_model.pth\"\n", - " torch.save(nn_model.state_dict(), ckpt_file)\n", + " torch.save(nn_model.state_dict(), ckpt_file\n", + " \n", + " # save model to wandb as an Artifact\n", + " artifact_name = f\"{wandb.run.id}_context_model\"\n", + " at = wandb.Artifact(artifact_name, type=\"model\")\n", + " at.add_file(ckpt_file)\n", + " wandb.log_artifact(at, aliases=[f\"epoch_{ep}\"])\n", + " \n", + " # sample the model and log the images to W&B\n", + " samples, _ = sample_ddpm_context(nn_model, \n", + " noises, \n", + " ctx_vector[:config.num_samples])\n", + " wandb.log({\n", + " \"train_samples\": [\n", + " wandb.Image(img) for img in samples.split(1)\n", + " ]})\n", " \n", "# finish W&B run\n", "wandb.finish()" @@ -271,7 +286,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.10.9" } }, "nbformat": 4, diff --git a/dlai/02_diffusion_sampling.ipynb b/dlai/02_diffusion_sampling.ipynb index a814b291..cbc1e181 100644 --- a/dlai/02_diffusion_sampling.ipynb +++ b/dlai/02_diffusion_sampling.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "700e687c", "metadata": { "tags": [] @@ -32,10 +32,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "dcaf7a29-782c-4735-991f-4408f5ec6128", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: (1) Private W&B dashboard, no account required\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: (2) Use an existing W&B account\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "wandb: Enter your choice: 1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: You chose 'Private W&B dashboard, no account required'\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /Users/tcapelle/.netrc\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "wandb.login(anonymous=\"allow\")" ] @@ -50,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "54c3a942", "metadata": { "tags": [] @@ -88,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "8ab66255", "metadata": {}, "outputs": [], @@ -122,10 +156,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "b47633e2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: 1 of 1 files downloaded. \n" + ] + } + ], "source": [ "nn_model = load_model(MODEL_ARTIFACT)" ] @@ -148,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "146424d3", "metadata": {}, "outputs": [], @@ -169,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "d88afdba", "metadata": { "tags": [] @@ -182,11 +224,11 @@ " config.height, config.height).to(DEVICE) \n", "\n", "# A fixed context vector to sample from\n", - "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0,\n", - " 1,1,1,1,1,1,\n", - " 2,2,2,2,2,2,\n", - " 3,3,3,3,3,3,\n", - " 4,4,4,4,4,4]), \n", + "ctx_vector = F.one_hot(torch.tensor([0,0,0,0,0,0, # hero\n", + " 1,1,1,1,1,1, # non-hero\n", + " 2,2,2,2,2,2, # food\n", + " 3,3,3,3,3,3, # spell\n", + " 4,4,4,4,4,4]), # side-facing \n", " 5).to(DEVICE).float()" ] }, @@ -200,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "9c1a945d", "metadata": {}, "outputs": [], @@ -222,10 +264,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "89e24210-4885-4559-92e1-db10566ef5ea", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/500 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Tracking run with wandb version 0.15.4" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Run data is saved locally in /Users/tcapelle/work/edu/dlai/wandb/run-20230719_144552-50ekio0x" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Syncing run fresh-plasma-6 to Weights & Biases (docs)
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View project at https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run at https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/50ekio0x" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Waiting for W&B process to finish... (success)." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " View run fresh-plasma-6 at: https://wandb.ai/deeplearning-ai-temp/dlai_sprite_diffusion/runs/50ekio0x
Synced 6 W&B file(s), 1 media file(s), 94 artifact file(s) and 1 other file(s)" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Find logs at: ./wandb/run-20230719_144552-50ekio0x/logs" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "with wandb.init(project=\"dlai_sprite_diffusion\", \n", " job_type=\"samplers_battle\", \n", @@ -318,6 +500,14 @@ " \n", " wandb.log({\"samplers_table\":table})" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7df56d25", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {