diff --git a/stable_diffusion.ipynb b/stable_diffusion.ipynb index f7edf67..63226bd 100644 --- a/stable_diffusion.ipynb +++ b/stable_diffusion.ipynb @@ -363,8 +363,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "models--CompVis--stable-diffusion-v1-4\tmodels--google--ddpm-church-256\r\n", - "models--google--ddpm-celebahq-256\r\n" + "models--CompVis--stable-diffusion-v1-4\tmodels--google--ddpm-church-256\n", + "models--google--ddpm-celebahq-256\n" ] } ], @@ -1323,9 +1323,7 @@ { "cell_type": "code", "execution_count": 24, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -1384,7 +1382,11 @@ "cell_type": "code", "execution_count": 4, "metadata": { - "collapsed": true + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] }, "outputs": [ { @@ -1613,8 +1615,8 @@ } ], "source": [ - "db_pipe = StableDiffusionPipeline.from_pretrained(\"pcuenq/jh_dreambooth_1000\", torch_dtype=torch.float16)\n", - "db_pipe = db_pipe.to(\"cuda\")" + "pipe = StableDiffusionPipeline.from_pretrained(\"pcuenq/jh_dreambooth_1000\", torch_dtype=torch.float16)\n", + "pipe = pipe.to(\"cuda\")" ] }, { @@ -1652,7 +1654,7 @@ "torch.manual_seed(1000)\n", "\n", "prompt = \"Painting of sks person in the style of Paul Signac\"\n", - "images = db_pipe(prompt, num_images_per_prompt=4).images\n", + "images = pipe(prompt, num_images_per_prompt=4).images\n", "image_grid(images, 1, 4)" ] }, @@ -1806,6 +1808,15 @@ "First, we need the text encoder and the tokenizer. These come from the text portion of a standard CLIP model, so we'll use the weights released by Open AI." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "del pipe" + ] + }, { "cell_type": "code", "execution_count": 25, @@ -1814,8 +1825,8 @@ "source": [ "from transformers import CLIPTextModel, CLIPTokenizer\n", "\n", - "tokenizer = CLIPTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\")\n", - "text_encoder = CLIPTextModel.from_pretrained(\"openai/clip-vit-large-patch14\")" + "tokenizer = CLIPTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", torch_dtype=torch.float16)\n", + "text_encoder = CLIPTextModel.from_pretrained(\"openai/clip-vit-large-patch14\", torch_dtype=torch.float16)" ] }, { @@ -1833,8 +1844,8 @@ "source": [ "from diffusers import AutoencoderKL, UNet2DConditionModel\n", "\n", - "vae = AutoencoderKL.from_pretrained(\"CompVis/stable-diffusion-v1-4\", subfolder=\"vae\")\n", - "unet = UNet2DConditionModel.from_pretrained(\"CompVis/stable-diffusion-v1-4\", subfolder=\"unet\")" + "vae = AutoencoderKL.from_pretrained(\"CompVis/stable-diffusion-v1-4\", subfolder=\"vae\", torch_dtype=torch.float16)\n", + "unet = UNet2DConditionModel.from_pretrained(\"CompVis/stable-diffusion-v1-4\", subfolder=\"unet\", torch_dtype=torch.float16)" ] }, { @@ -2059,7 +2070,7 @@ } ], "source": [ - "text_embeddings = text_encoder(text_input.input_ids.to(\"cuda\"))[0]\n", + "text_embeddings = text_encoder(text_input.input_ids.to(\"cuda\"))[0].half()\n", "text_embeddings.shape" ] }, @@ -2093,7 +2104,7 @@ "uncond_input = tokenizer(\n", " [\"\"] * batch_size, padding=\"max_length\", max_length=max_length, return_tensors=\"pt\"\n", ")\n", - "uncond_embeddings = text_encoder(uncond_input.input_ids.to(\"cuda\"))[0]\n", + "uncond_embeddings = text_encoder(uncond_input.input_ids.to(\"cuda\"))[0].half()\n", "uncond_embeddings.shape" ] }, @@ -2147,7 +2158,7 @@ "source": [ "torch.manual_seed(100)\n", "latents = torch.randn((batch_size, unet.in_channels, height // 8, width // 8))\n", - "latents = latents.to(\"cuda\")\n", + "latents = latents.to(\"cuda\").half()\n", "latents.shape" ] }, @@ -2356,8 +2367,7 @@ "height": 529 }, "id": "AAVZStIokTVv", - "outputId": "7af6a1ea-f20a-4445-d756-8bb0dd6a0747", - "scrolled": false + "outputId": "7af6a1ea-f20a-4445-d756-8bb0dd6a0747" }, "outputs": [ { @@ -2405,7 +2415,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.10" + "version": "3.10.6" }, "toc": { "base_numbering": 1, @@ -16198,5 +16208,5 @@ } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 4 }