From 358ab2b1921af44981bd7a84733d8e1d91ca836f Mon Sep 17 00:00:00 2001
From: xprabhudayal <128473068+xprabhudayal@users.noreply.github.com>
Date: Wed, 21 Jan 2026 22:28:59 +0530
Subject: [PATCH 1/2] add google colab notebook example for easy inference
---
examples/HeartMuLa_Colab_Example.ipynb | 833 +++++++++++++++++++++++++
1 file changed, 833 insertions(+)
create mode 100644 examples/HeartMuLa_Colab_Example.ipynb
diff --git a/examples/HeartMuLa_Colab_Example.ipynb b/examples/HeartMuLa_Colab_Example.ipynb
new file mode 100644
index 0000000..26f0307
--- /dev/null
+++ b/examples/HeartMuLa_Colab_Example.ipynb
@@ -0,0 +1,833 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KZP-87a0WRpp"
+ },
+ "source": [
+ "# π΅ HeartMuLa: Music Generation Inference\n",
+ "\n",
+ "This notebook allows you to run **HeartMuLa** (Heart Music Language), a state-of-the-art open-source music generation model.\n",
+ "\n",
+ " GitHub Repository | ArXiv Paper\n",
+ "\n",
+ "### β οΈ Runtime Requirement\n",
+ "**GPU is required.** Please ensure you are running on a GPU runtime.\n",
+ "* **Colab:** Runtime > Change runtime type > T4 GPU (Standard) or A100 (Premium).\n",
+ "* **Note:** T4 GPUs have limited memory. We use `lazy_load=True` to minimize VRAM usage."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "2PnjQkCYWRpq"
+ },
+ "source": [
+ "## 1. Setup Environment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "okdeSQ0uWRpr",
+ "outputId": "4ba66d27-7379-461c-91bd-e3b776f624ff"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "β
Installation complete.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title Install Dependencies\n",
+ "# @markdown This step clones the repository and installs the required Python packages.\n",
+ "\n",
+ "import os\n",
+ "import sys\n",
+ "from IPython.display import clear_output\n",
+ "\n",
+ "!nvidia-smi\n",
+ "\n",
+ "print(\"Installing dependencies... (this may take 1-2 minutes)\")\n",
+ "\n",
+ "# Clone repo\n",
+ "if not os.path.exists(\"/content/heartlib\"):\n",
+ " %cd /content\n",
+ " !git clone https://github.com/HeartMuLa/heartlib.git\n",
+ "\n",
+ "%cd /content/heartlib\n",
+ "\n",
+ "# Install dependencies\n",
+ "!pip install . --quiet\n",
+ "!pip install huggingface_hub --quiet\n",
+ "\n",
+ "clear_output()\n",
+ "print(\"β
Installation complete.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5NtsvvuRWRpr"
+ },
+ "source": [
+ "## 2. Download Models\n",
+ "We need to download three components from Hugging Face:\n",
+ "1. **Configuration & Tokenizer**: `HeartMuLa/HeartMuLaGen`\n",
+ "2. **Music Model (3B)**: `HeartMuLa/HeartMuLa-oss-3B`\n",
+ "3. **Audio Codec**: `HeartMuLa/HeartCodec-oss`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "cellView": "form",
+ "id": "2OSp2ezkWRps",
+ "outputId": "e3596a60-c1f6-40f4-c186-0272ad843b90"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "β
Models downloaded successfully to ./ckpt\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title Download Checkpoints\n",
+ "\n",
+ "import os\n",
+ "\n",
+ "# Define paths\n",
+ "ckpt_dir = \"/content/heartlib/ckpt\"\n",
+ "os.makedirs(ckpt_dir, exist_ok=True)\n",
+ "\n",
+ "print(\"Downloading configuration and tokenizer...\")\n",
+ "!huggingface-cli download HeartMuLa/HeartMuLaGen --local-dir {ckpt_dir} --local-dir-use-symlinks False\n",
+ "\n",
+ "print(\"Downloading HeartMuLa-oss-3B model...\")\n",
+ "!huggingface-cli download HeartMuLa/HeartMuLa-oss-3B --local-dir {ckpt_dir}/HeartMuLa-oss-3B --local-dir-use-symlinks False\n",
+ "\n",
+ "print(\"Downloading HeartCodec-oss...\")\n",
+ "!huggingface-cli download HeartMuLa/HeartCodec-oss --local-dir {ckpt_dir}/HeartCodec-oss --local-dir-use-symlinks False\n",
+ "\n",
+ "clear_output()\n",
+ "print(\"β
Models downloaded successfully to ./ckpt\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "40MGZow3WRps"
+ },
+ "source": [
+ "## 3. Load Model Pipeline\n",
+ "We initialize the pipeline here. We auto-detect if your GPU supports `bfloat16` (Ampere/A100) or if we should fallback to `float16` / `float32` (T4/Pascal)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "LeHTVR5xWRps",
+ "outputId": "8ab95ad7-62ad-46b8-9dfa-51ab1136f7c6"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Using bfloat16 precision (Optimal)\n",
+ "Model components will be loaded to devices as specified:\n",
+ " mula: cuda\n",
+ " codec: cuda\n",
+ "β
Pipeline loaded successfully!\n"
+ ]
+ }
+ ],
+ "source": [
+ "import torch\n",
+ "from heartlib import HeartMuLaGenPipeline\n",
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "if torch.cuda.is_available() and torch.cuda.is_bf16_supported():\n",
+ " mula_dtype = torch.bfloat16\n",
+ " print(\"Using bfloat16 precision (Optimal)\")\n",
+ "else:\n",
+ " mula_dtype = torch.float16\n",
+ " print(\"Using float16 precision (Fallback for T4/V100)\")\n",
+ "codec_dtype = torch.float32\n",
+ "pipe = HeartMuLaGenPipeline.from_pretrained(\n",
+ " \"./ckpt\",\n",
+ " device={\"mula\": device, \"codec\": device},\n",
+ " dtype={\"mula\": mula_dtype, \"codec\": codec_dtype},\n",
+ " version=\"3B\",\n",
+ " lazy_load=True\n",
+ ")\n",
+ "print(\"β
Pipeline loaded successfully!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "PF4z9hLaWRps"
+ },
+ "source": [
+ "## 4. Generate Music\n",
+ "\n",
+ "Enter your Lyrics and Tags below.\n",
+ "* **Lyrics**: Structure them with tags like `[Verse]`, `[Chorus]`.\n",
+ "* **Tags**: Comma-separated genres or moods (e.g., `pop, happy, piano`).\n",
+ "* **Parameters**:\n",
+ " * `Max Duration`: Length of the song in seconds (1 min = 60000ms).\n",
+ " * `CFG Scale`: Higher values follow text more closely (1.5 is standard).\n",
+ " * `Temperature`: Creativity (1.0 is standard)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 245,
+ "referenced_widgets": [
+ "a9890e1691ff4c8d89cd5a8e545bed0e",
+ "d7d86758aafb4b64adeebb2e81edc71f",
+ "750d9ec077c44b6a9f04f222cc51a333",
+ "36486be0a6ed4be99f69b7dac85c11f6",
+ "22ae4ffc8f8847898a77c73f4e2cf2ef",
+ "69644cf1574d4842ae5e53bfe713d655",
+ "77d94de8de3c46da85b6b4310b5f8be2",
+ "c5b1d0249fff4d5a8c7a32e674229a6a",
+ "c4014d4bebb64d558ced03b470c38543",
+ "460a689a399f4d9a86cc6060dcfb598d",
+ "138679070de94d4c9663bfe84dbfcf19"
+ ]
+ },
+ "id": "JO8dmoWjWRps",
+ "outputId": "0e122426-407f-44fb-dab4-457d12ddf307"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Generating music... Please wait.\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Loading checkpoint shards: 0%| | 0/4 [00:00, ?it/s]"
+ ],
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "a9890e1691ff4c8d89cd5a8e545bed0e"
+ }
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|ββββββββββ| 375/375 [02:24<00:00, 2.59it/s]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "You have set lazy_load=True. Unloading HeartMuLa from device.\n",
+ "CUDA memory before unloading: 12.67 GB\n",
+ "CUDA memory after unloading: 0.01 GB\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|ββββββββββ| 10/10 [00:13<00:00, 1.40s/it]\n",
+ "100%|ββββββββββ| 10/10 [00:13<00:00, 1.38s/it]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "You have set lazy_load=True. Unloading HeartCodec from device.\n",
+ "CUDA memory before unloading: 6.20 GB\n",
+ "CUDA memory after unloading: 0.01 GB\n",
+ "β
Generation complete: ./assets/my_generation.mp3\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title πΉ Inference Settings\n",
+ "\n",
+ "lyrics = \"\"\"[Intro]\n",
+ "\n",
+ "[Verse]\n",
+ "The city lights are calling\n",
+ "Through the rain that keeps on falling\n",
+ "A neon glow on wet pavement\n",
+ "A silent wish, a time well spent\n",
+ "\n",
+ "[Chorus]\n",
+ "We are running through the night\n",
+ "Chasing shadows, chasing light\n",
+ "In the rhythm of the street\n",
+ "Where the heart and soul both meet\n",
+ "\n",
+ "[Outro]\n",
+ "Fading away...\"\"\"\n",
+ "\n",
+ "tags = \"pop, synthwave, emotional, piano, female vocals\" # @param {type:\"string\"}\n",
+ "\n",
+ "duration_seconds = 30 # @param {type:\"slider\", min:10, max:180, step:10}\n",
+ "guidance_scale = 2.1 # @param {type:\"slider\", min:1.0, max:5.0, step:0.1}\n",
+ "temperature = 1.0 # @param {type:\"slider\", min:0.1, max:2.0, step:0.1}\n",
+ "\n",
+ "# Convert duration to ms\n",
+ "max_audio_length_ms = duration_seconds * 1000\n",
+ "\n",
+ "output_path = \"./assets/my_generation.mp3\"\n",
+ "os.makedirs(\"./assets\", exist_ok=True)\n",
+ "\n",
+ "print(\"Generating music... Please wait.\")\n",
+ "\n",
+ "with torch.no_grad():\n",
+ " pipe(\n",
+ " {\n",
+ " \"lyrics\": lyrics,\n",
+ " \"tags\": tags,\n",
+ " },\n",
+ " max_audio_length_ms=max_audio_length_ms,\n",
+ " save_path=output_path,\n",
+ " topk=50,\n",
+ " temperature=temperature,\n",
+ " cfg_scale=guidance_scale,\n",
+ " )\n",
+ "\n",
+ "print(f\"β
Generation complete: {output_path}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 75
+ },
+ "id": "MoZxfvQNWRpt",
+ "outputId": "b9829605-ba90-4289-b44c-b4d3944e678d"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "# @title π§ Listen to the Song\n",
+ "from IPython.display import Audio, display\n",
+ "\n",
+ "if os.path.exists(output_path):\n",
+ " display(Audio(output_path))\n",
+ "else:\n",
+ " print(\"Audio file not found. Please run the generation cell first.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
+ },
+ "id": "P8ZgHP9gWRpt",
+ "outputId": "c71680f8-9743-4bc5-cb4f-8b9c1a7b588d"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "application/javascript": [
+ "\n",
+ " async function download(id, filename, size) {\n",
+ " if (!google.colab.kernel.accessAllowed) {\n",
+ " return;\n",
+ " }\n",
+ " const div = document.createElement('div');\n",
+ " const label = document.createElement('label');\n",
+ " label.textContent = `Downloading \"${filename}\": `;\n",
+ " div.appendChild(label);\n",
+ " const progress = document.createElement('progress');\n",
+ " progress.max = size;\n",
+ " div.appendChild(progress);\n",
+ " document.body.appendChild(div);\n",
+ "\n",
+ " const buffers = [];\n",
+ " let downloaded = 0;\n",
+ "\n",
+ " const channel = await google.colab.kernel.comms.open(id);\n",
+ " // Send a message to notify the kernel that we're ready.\n",
+ " channel.send({})\n",
+ "\n",
+ " for await (const message of channel.messages) {\n",
+ " // Send a message to notify the kernel that we're ready.\n",
+ " channel.send({})\n",
+ " if (message.buffers) {\n",
+ " for (const buffer of message.buffers) {\n",
+ " buffers.push(buffer);\n",
+ " downloaded += buffer.byteLength;\n",
+ " progress.value = downloaded;\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " const blob = new Blob(buffers, {type: 'application/binary'});\n",
+ " const a = document.createElement('a');\n",
+ " a.href = window.URL.createObjectURL(blob);\n",
+ " a.download = filename;\n",
+ " div.appendChild(a);\n",
+ " a.click();\n",
+ " div.remove();\n",
+ " }\n",
+ " "
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
+ ],
+ "application/javascript": [
+ "download(\"download_a80c3e12-7436-4a02-b10a-80fd88f924dd\", \"my_generation.mp3\", 482157)"
+ ]
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "# @title (Optional) Download MP3\n",
+ "# @markdown Run this to download the generated file to your local computer.\n",
+ "from google.colab import files\n",
+ "if os.path.exists(output_path):\n",
+ " files.download(output_path)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ },
+ "colab": {
+ "provenance": [],
+ "collapsed_sections": [
+ "5NtsvvuRWRpr"
+ ],
+ "gpuType": "T4"
+ },
+ "accelerator": "GPU",
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "a9890e1691ff4c8d89cd5a8e545bed0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HBoxModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_d7d86758aafb4b64adeebb2e81edc71f",
+ "IPY_MODEL_750d9ec077c44b6a9f04f222cc51a333",
+ "IPY_MODEL_36486be0a6ed4be99f69b7dac85c11f6"
+ ],
+ "layout": "IPY_MODEL_22ae4ffc8f8847898a77c73f4e2cf2ef"
+ }
+ },
+ "d7d86758aafb4b64adeebb2e81edc71f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_69644cf1574d4842ae5e53bfe713d655",
+ "placeholder": "β",
+ "style": "IPY_MODEL_77d94de8de3c46da85b6b4310b5f8be2",
+ "value": "Loadingβcheckpointβshards:β100%"
+ }
+ },
+ "750d9ec077c44b6a9f04f222cc51a333": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "FloatProgressModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c5b1d0249fff4d5a8c7a32e674229a6a",
+ "max": 4,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_c4014d4bebb64d558ced03b470c38543",
+ "value": 4
+ }
+ },
+ "36486be0a6ed4be99f69b7dac85c11f6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "HTMLModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_460a689a399f4d9a86cc6060dcfb598d",
+ "placeholder": "β",
+ "style": "IPY_MODEL_138679070de94d4c9663bfe84dbfcf19",
+ "value": "β4/4β[01:04<00:00,β13.86s/it]"
+ }
+ },
+ "22ae4ffc8f8847898a77c73f4e2cf2ef": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "69644cf1574d4842ae5e53bfe713d655": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "77d94de8de3c46da85b6b4310b5f8be2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "c5b1d0249fff4d5a8c7a32e674229a6a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c4014d4bebb64d558ced03b470c38543": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "ProgressStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "460a689a399f4d9a86cc6060dcfb598d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_name": "LayoutModel",
+ "model_module_version": "1.2.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "138679070de94d4c9663bfe84dbfcf19": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_name": "DescriptionStyleModel",
+ "model_module_version": "1.5.0",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
From e456c42cf83dd2058feb6b49512876677162f243 Mon Sep 17 00:00:00 2001
From: xprabhudayal <128473068+xprabhudayal@users.noreply.github.com>
Date: Wed, 21 Jan 2026 23:04:50 +0530
Subject: [PATCH 2/2] fix notebook metadata for github preview
---
examples/HeartMuLa_Colab_Example.ipynb | 1256 +++++++++---------------
1 file changed, 455 insertions(+), 801 deletions(-)
diff --git a/examples/HeartMuLa_Colab_Example.ipynb b/examples/HeartMuLa_Colab_Example.ipynb
index 26f0307..f412c4d 100644
--- a/examples/HeartMuLa_Colab_Example.ipynb
+++ b/examples/HeartMuLa_Colab_Example.ipynb
@@ -1,833 +1,487 @@
{
- "cells": [
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KZP-87a0WRpp"
+ },
+ "source": [
+ "# \ud83c\udfb5 HeartMuLa: Music Generation Inference\n",
+ "\n",
+ "This notebook allows you to run **HeartMuLa** (Heart Music Language), a state-of-the-art open-source music generation model.\n",
+ "\n",
+ " GitHub Repository | ArXiv Paper\n",
+ "\n",
+ "### \u26a0\ufe0f Runtime Requirement\n",
+ "**GPU is required.** Please ensure you are running on a GPU runtime.\n",
+ "* **Colab:** Runtime > Change runtime type > T4 GPU (Standard) or A100 (Premium).\n",
+ "* **Note:** T4 GPUs have limited memory. We use `lazy_load=True` to minimize VRAM usage."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "2PnjQkCYWRpq"
+ },
+ "source": [
+ "## 1. Setup Environment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "okdeSQ0uWRpr",
+ "outputId": "4ba66d27-7379-461c-91bd-e3b776f624ff"
+ },
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "KZP-87a0WRpp"
- },
- "source": [
- "# π΅ HeartMuLa: Music Generation Inference\n",
- "\n",
- "This notebook allows you to run **HeartMuLa** (Heart Music Language), a state-of-the-art open-source music generation model.\n",
- "\n",
- " GitHub Repository | ArXiv Paper\n",
- "\n",
- "### β οΈ Runtime Requirement\n",
- "**GPU is required.** Please ensure you are running on a GPU runtime.\n",
- "* **Colab:** Runtime > Change runtime type > T4 GPU (Standard) or A100 (Premium).\n",
- "* **Note:** T4 GPUs have limited memory. We use `lazy_load=True` to minimize VRAM usage."
- ]
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u2705 Installation complete.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title Install Dependencies\n",
+ "# @markdown This step clones the repository and installs the required Python packages.\n",
+ "\n",
+ "import os\n",
+ "import sys\n",
+ "from IPython.display import clear_output\n",
+ "\n",
+ "!nvidia-smi\n",
+ "\n",
+ "print(\"Installing dependencies... (this may take 1-2 minutes)\")\n",
+ "\n",
+ "# Clone repo\n",
+ "if not os.path.exists(\"/content/heartlib\"):\n",
+ " %cd /content\n",
+ " !git clone https://github.com/HeartMuLa/heartlib.git\n",
+ "\n",
+ "%cd /content/heartlib\n",
+ "\n",
+ "# Install dependencies\n",
+ "!pip install . --quiet\n",
+ "!pip install huggingface_hub --quiet\n",
+ "\n",
+ "clear_output()\n",
+ "print(\"\u2705 Installation complete.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5NtsvvuRWRpr"
+ },
+ "source": [
+ "## 2. Download Models\n",
+ "We need to download three components from Hugging Face:\n",
+ "1. **Configuration & Tokenizer**: `HeartMuLa/HeartMuLaGen`\n",
+ "2. **Music Model (3B)**: `HeartMuLa/HeartMuLa-oss-3B`\n",
+ "3. **Audio Codec**: `HeartMuLa/HeartCodec-oss`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
+ "cellView": "form",
+ "id": "2OSp2ezkWRps",
+ "outputId": "e3596a60-c1f6-40f4-c186-0272ad843b90"
+ },
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "2PnjQkCYWRpq"
- },
- "source": [
- "## 1. Setup Environment"
- ]
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u2705 Models downloaded successfully to ./ckpt\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title Download Checkpoints\n",
+ "\n",
+ "import os\n",
+ "\n",
+ "# Define paths\n",
+ "ckpt_dir = \"/content/heartlib/ckpt\"\n",
+ "os.makedirs(ckpt_dir, exist_ok=True)\n",
+ "\n",
+ "print(\"Downloading configuration and tokenizer...\")\n",
+ "!huggingface-cli download HeartMuLa/HeartMuLaGen --local-dir {ckpt_dir} --local-dir-use-symlinks False\n",
+ "\n",
+ "print(\"Downloading HeartMuLa-oss-3B model...\")\n",
+ "!huggingface-cli download HeartMuLa/HeartMuLa-oss-3B --local-dir {ckpt_dir}/HeartMuLa-oss-3B --local-dir-use-symlinks False\n",
+ "\n",
+ "print(\"Downloading HeartCodec-oss...\")\n",
+ "!huggingface-cli download HeartMuLa/HeartCodec-oss --local-dir {ckpt_dir}/HeartCodec-oss --local-dir-use-symlinks False\n",
+ "\n",
+ "clear_output()\n",
+ "print(\"\u2705 Models downloaded successfully to ./ckpt\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "40MGZow3WRps"
+ },
+ "source": [
+ "## 3. Load Model Pipeline\n",
+ "We initialize the pipeline here. We auto-detect if your GPU supports `bfloat16` (Ampere/A100) or if we should fallback to `float16` / `float32` (T4/Pascal)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
},
+ "id": "LeHTVR5xWRps",
+ "outputId": "8ab95ad7-62ad-46b8-9dfa-51ab1136f7c6"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "okdeSQ0uWRpr",
- "outputId": "4ba66d27-7379-461c-91bd-e3b776f624ff"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "β
Installation complete.\n"
- ]
- }
- ],
- "source": [
- "# @title Install Dependencies\n",
- "# @markdown This step clones the repository and installs the required Python packages.\n",
- "\n",
- "import os\n",
- "import sys\n",
- "from IPython.display import clear_output\n",
- "\n",
- "!nvidia-smi\n",
- "\n",
- "print(\"Installing dependencies... (this may take 1-2 minutes)\")\n",
- "\n",
- "# Clone repo\n",
- "if not os.path.exists(\"/content/heartlib\"):\n",
- " %cd /content\n",
- " !git clone https://github.com/HeartMuLa/heartlib.git\n",
- "\n",
- "%cd /content/heartlib\n",
- "\n",
- "# Install dependencies\n",
- "!pip install . --quiet\n",
- "!pip install huggingface_hub --quiet\n",
- "\n",
- "clear_output()\n",
- "print(\"β
Installation complete.\")"
- ]
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Using bfloat16 precision (Optimal)\n",
+ "Model components will be loaded to devices as specified:\n",
+ " mula: cuda\n",
+ " codec: cuda\n",
+ "\u2705 Pipeline loaded successfully!\n"
+ ]
+ }
+ ],
+ "source": [
+ "import torch\n",
+ "from heartlib import HeartMuLaGenPipeline\n",
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+ "if torch.cuda.is_available() and torch.cuda.is_bf16_supported():\n",
+ " mula_dtype = torch.bfloat16\n",
+ " print(\"Using bfloat16 precision (Optimal)\")\n",
+ "else:\n",
+ " mula_dtype = torch.float16\n",
+ " print(\"Using float16 precision (Fallback for T4/V100)\")\n",
+ "codec_dtype = torch.float32\n",
+ "pipe = HeartMuLaGenPipeline.from_pretrained(\n",
+ " \"./ckpt\",\n",
+ " device={\"mula\": device, \"codec\": device},\n",
+ " dtype={\"mula\": mula_dtype, \"codec\": codec_dtype},\n",
+ " version=\"3B\",\n",
+ " lazy_load=True\n",
+ ")\n",
+ "print(\"\u2705 Pipeline loaded successfully!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "PF4z9hLaWRps"
+ },
+ "source": [
+ "## 4. Generate Music\n",
+ "\n",
+ "Enter your Lyrics and Tags below.\n",
+ "* **Lyrics**: Structure them with tags like `[Verse]`, `[Chorus]`.\n",
+ "* **Tags**: Comma-separated genres or moods (e.g., `pop, happy, piano`).\n",
+ "* **Parameters**:\n",
+ " * `Max Duration`: Length of the song in seconds (1 min = 60000ms).\n",
+ " * `CFG Scale`: Higher values follow text more closely (1.5 is standard).\n",
+ " * `Temperature`: Creativity (1.0 is standard)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 245,
+ "referenced_widgets": [
+ "a9890e1691ff4c8d89cd5a8e545bed0e",
+ "d7d86758aafb4b64adeebb2e81edc71f",
+ "750d9ec077c44b6a9f04f222cc51a333",
+ "36486be0a6ed4be99f69b7dac85c11f6",
+ "22ae4ffc8f8847898a77c73f4e2cf2ef",
+ "69644cf1574d4842ae5e53bfe713d655",
+ "77d94de8de3c46da85b6b4310b5f8be2",
+ "c5b1d0249fff4d5a8c7a32e674229a6a",
+ "c4014d4bebb64d558ced03b470c38543",
+ "460a689a399f4d9a86cc6060dcfb598d",
+ "138679070de94d4c9663bfe84dbfcf19"
+ ]
},
+ "id": "JO8dmoWjWRps",
+ "outputId": "0e122426-407f-44fb-dab4-457d12ddf307"
+ },
+ "outputs": [
{
- "cell_type": "markdown",
- "metadata": {
- "id": "5NtsvvuRWRpr"
- },
- "source": [
- "## 2. Download Models\n",
- "We need to download three components from Hugging Face:\n",
- "1. **Configuration & Tokenizer**: `HeartMuLa/HeartMuLaGen`\n",
- "2. **Music Model (3B)**: `HeartMuLa/HeartMuLa-oss-3B`\n",
- "3. **Audio Codec**: `HeartMuLa/HeartCodec-oss`"
- ]
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Generating music... Please wait.\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "cellView": "form",
- "id": "2OSp2ezkWRps",
- "outputId": "e3596a60-c1f6-40f4-c186-0272ad843b90"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "β
Models downloaded successfully to ./ckpt\n"
- ]
- }
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "Loading checkpoint shards: 0%| | 0/4 [00:00, ?it/s]"
],
- "source": [
- "# @title Download Checkpoints\n",
- "\n",
- "import os\n",
- "\n",
- "# Define paths\n",
- "ckpt_dir = \"/content/heartlib/ckpt\"\n",
- "os.makedirs(ckpt_dir, exist_ok=True)\n",
- "\n",
- "print(\"Downloading configuration and tokenizer...\")\n",
- "!huggingface-cli download HeartMuLa/HeartMuLaGen --local-dir {ckpt_dir} --local-dir-use-symlinks False\n",
- "\n",
- "print(\"Downloading HeartMuLa-oss-3B model...\")\n",
- "!huggingface-cli download HeartMuLa/HeartMuLa-oss-3B --local-dir {ckpt_dir}/HeartMuLa-oss-3B --local-dir-use-symlinks False\n",
- "\n",
- "print(\"Downloading HeartCodec-oss...\")\n",
- "!huggingface-cli download HeartMuLa/HeartCodec-oss --local-dir {ckpt_dir}/HeartCodec-oss --local-dir-use-symlinks False\n",
- "\n",
- "clear_output()\n",
- "print(\"β
Models downloaded successfully to ./ckpt\")"
- ]
+ "application/vnd.jupyter.widget-view+json": {
+ "version_major": 2,
+ "version_minor": 0,
+ "model_id": "a9890e1691ff4c8d89cd5a8e545bed0e"
+ }
+ },
+ "metadata": {}
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "40MGZow3WRps"
- },
- "source": [
- "## 3. Load Model Pipeline\n",
- "We initialize the pipeline here. We auto-detect if your GPU supports `bfloat16` (Ampere/A100) or if we should fallback to `float16` / `float32` (T4/Pascal)."
- ]
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 375/375 [02:24<00:00, 2.59it/s]\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "LeHTVR5xWRps",
- "outputId": "8ab95ad7-62ad-46b8-9dfa-51ab1136f7c6"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Using bfloat16 precision (Optimal)\n",
- "Model components will be loaded to devices as specified:\n",
- " mula: cuda\n",
- " codec: cuda\n",
- "β
Pipeline loaded successfully!\n"
- ]
- }
- ],
- "source": [
- "import torch\n",
- "from heartlib import HeartMuLaGenPipeline\n",
- "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
- "if torch.cuda.is_available() and torch.cuda.is_bf16_supported():\n",
- " mula_dtype = torch.bfloat16\n",
- " print(\"Using bfloat16 precision (Optimal)\")\n",
- "else:\n",
- " mula_dtype = torch.float16\n",
- " print(\"Using float16 precision (Fallback for T4/V100)\")\n",
- "codec_dtype = torch.float32\n",
- "pipe = HeartMuLaGenPipeline.from_pretrained(\n",
- " \"./ckpt\",\n",
- " device={\"mula\": device, \"codec\": device},\n",
- " dtype={\"mula\": mula_dtype, \"codec\": codec_dtype},\n",
- " version=\"3B\",\n",
- " lazy_load=True\n",
- ")\n",
- "print(\"β
Pipeline loaded successfully!\")"
- ]
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "You have set lazy_load=True. Unloading HeartMuLa from device.\n",
+ "CUDA memory before unloading: 12.67 GB\n",
+ "CUDA memory after unloading: 0.01 GB\n"
+ ]
},
{
- "cell_type": "markdown",
- "metadata": {
- "id": "PF4z9hLaWRps"
- },
- "source": [
- "## 4. Generate Music\n",
- "\n",
- "Enter your Lyrics and Tags below.\n",
- "* **Lyrics**: Structure them with tags like `[Verse]`, `[Chorus]`.\n",
- "* **Tags**: Comma-separated genres or moods (e.g., `pop, happy, piano`).\n",
- "* **Parameters**:\n",
- " * `Max Duration`: Length of the song in seconds (1 min = 60000ms).\n",
- " * `CFG Scale`: Higher values follow text more closely (1.5 is standard).\n",
- " * `Temperature`: Creativity (1.0 is standard)."
- ]
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 10/10 [00:13<00:00, 1.40s/it]\n",
+ "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 10/10 [00:13<00:00, 1.38s/it]\n"
+ ]
},
{
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 245,
- "referenced_widgets": [
- "a9890e1691ff4c8d89cd5a8e545bed0e",
- "d7d86758aafb4b64adeebb2e81edc71f",
- "750d9ec077c44b6a9f04f222cc51a333",
- "36486be0a6ed4be99f69b7dac85c11f6",
- "22ae4ffc8f8847898a77c73f4e2cf2ef",
- "69644cf1574d4842ae5e53bfe713d655",
- "77d94de8de3c46da85b6b4310b5f8be2",
- "c5b1d0249fff4d5a8c7a32e674229a6a",
- "c4014d4bebb64d558ced03b470c38543",
- "460a689a399f4d9a86cc6060dcfb598d",
- "138679070de94d4c9663bfe84dbfcf19"
- ]
- },
- "id": "JO8dmoWjWRps",
- "outputId": "0e122426-407f-44fb-dab4-457d12ddf307"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Generating music... Please wait.\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "Loading checkpoint shards: 0%| | 0/4 [00:00, ?it/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "a9890e1691ff4c8d89cd5a8e545bed0e"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "100%|ββββββββββ| 375/375 [02:24<00:00, 2.59it/s]\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "You have set lazy_load=True. Unloading HeartMuLa from device.\n",
- "CUDA memory before unloading: 12.67 GB\n",
- "CUDA memory after unloading: 0.01 GB\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "100%|ββββββββββ| 10/10 [00:13<00:00, 1.40s/it]\n",
- "100%|ββββββββββ| 10/10 [00:13<00:00, 1.38s/it]\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "You have set lazy_load=True. Unloading HeartCodec from device.\n",
- "CUDA memory before unloading: 6.20 GB\n",
- "CUDA memory after unloading: 0.01 GB\n",
- "β
Generation complete: ./assets/my_generation.mp3\n"
- ]
- }
- ],
- "source": [
- "# @title πΉ Inference Settings\n",
- "\n",
- "lyrics = \"\"\"[Intro]\n",
- "\n",
- "[Verse]\n",
- "The city lights are calling\n",
- "Through the rain that keeps on falling\n",
- "A neon glow on wet pavement\n",
- "A silent wish, a time well spent\n",
- "\n",
- "[Chorus]\n",
- "We are running through the night\n",
- "Chasing shadows, chasing light\n",
- "In the rhythm of the street\n",
- "Where the heart and soul both meet\n",
- "\n",
- "[Outro]\n",
- "Fading away...\"\"\"\n",
- "\n",
- "tags = \"pop, synthwave, emotional, piano, female vocals\" # @param {type:\"string\"}\n",
- "\n",
- "duration_seconds = 30 # @param {type:\"slider\", min:10, max:180, step:10}\n",
- "guidance_scale = 2.1 # @param {type:\"slider\", min:1.0, max:5.0, step:0.1}\n",
- "temperature = 1.0 # @param {type:\"slider\", min:0.1, max:2.0, step:0.1}\n",
- "\n",
- "# Convert duration to ms\n",
- "max_audio_length_ms = duration_seconds * 1000\n",
- "\n",
- "output_path = \"./assets/my_generation.mp3\"\n",
- "os.makedirs(\"./assets\", exist_ok=True)\n",
- "\n",
- "print(\"Generating music... Please wait.\")\n",
- "\n",
- "with torch.no_grad():\n",
- " pipe(\n",
- " {\n",
- " \"lyrics\": lyrics,\n",
- " \"tags\": tags,\n",
- " },\n",
- " max_audio_length_ms=max_audio_length_ms,\n",
- " save_path=output_path,\n",
- " topk=50,\n",
- " temperature=temperature,\n",
- " cfg_scale=guidance_scale,\n",
- " )\n",
- "\n",
- "print(f\"β
Generation complete: {output_path}\")"
- ]
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "You have set lazy_load=True. Unloading HeartCodec from device.\n",
+ "CUDA memory before unloading: 6.20 GB\n",
+ "CUDA memory after unloading: 0.01 GB\n",
+ "\u2705 Generation complete: ./assets/my_generation.mp3\n"
+ ]
+ }
+ ],
+ "source": [
+ "# @title \ud83c\udfb9 Inference Settings\n",
+ "\n",
+ "lyrics = \"\"\"[Intro]\n",
+ "\n",
+ "[Verse]\n",
+ "The city lights are calling\n",
+ "Through the rain that keeps on falling\n",
+ "A neon glow on wet pavement\n",
+ "A silent wish, a time well spent\n",
+ "\n",
+ "[Chorus]\n",
+ "We are running through the night\n",
+ "Chasing shadows, chasing light\n",
+ "In the rhythm of the street\n",
+ "Where the heart and soul both meet\n",
+ "\n",
+ "[Outro]\n",
+ "Fading away...\"\"\"\n",
+ "\n",
+ "tags = \"pop, synthwave, emotional, piano, female vocals\" # @param {type:\"string\"}\n",
+ "\n",
+ "duration_seconds = 30 # @param {type:\"slider\", min:10, max:180, step:10}\n",
+ "guidance_scale = 2.1 # @param {type:\"slider\", min:1.0, max:5.0, step:0.1}\n",
+ "temperature = 1.0 # @param {type:\"slider\", min:0.1, max:2.0, step:0.1}\n",
+ "\n",
+ "# Convert duration to ms\n",
+ "max_audio_length_ms = duration_seconds * 1000\n",
+ "\n",
+ "output_path = \"./assets/my_generation.mp3\"\n",
+ "os.makedirs(\"./assets\", exist_ok=True)\n",
+ "\n",
+ "print(\"Generating music... Please wait.\")\n",
+ "\n",
+ "with torch.no_grad():\n",
+ " pipe(\n",
+ " {\n",
+ " \"lyrics\": lyrics,\n",
+ " \"tags\": tags,\n",
+ " },\n",
+ " max_audio_length_ms=max_audio_length_ms,\n",
+ " save_path=output_path,\n",
+ " topk=50,\n",
+ " temperature=temperature,\n",
+ " cfg_scale=guidance_scale,\n",
+ " )\n",
+ "\n",
+ "print(f\"\u2705 Generation complete: {output_path}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 75
},
+ "id": "MoZxfvQNWRpt",
+ "outputId": "b9829605-ba90-4289-b44c-b4d3944e678d"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 75
- },
- "id": "MoZxfvQNWRpt",
- "outputId": "b9829605-ba90-4289-b44c-b4d3944e678d"
- },
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- ""
- ],
- "text/html": [
- "\n",
- " \n",
- " "
- ]
- },
- "metadata": {}
- }
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
],
- "source": [
- "# @title π§ Listen to the Song\n",
- "from IPython.display import Audio, display\n",
- "\n",
- "if os.path.exists(output_path):\n",
- " display(Audio(output_path))\n",
- "else:\n",
- " print(\"Audio file not found. Please run the generation cell first.\")"
+ "text/html": [
+ "\n",
+ " \n",
+ " "
]
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "# @title \ud83c\udfa7 Listen to the Song\n",
+ "from IPython.display import Audio, display\n",
+ "\n",
+ "if os.path.exists(output_path):\n",
+ " display(Audio(output_path))\n",
+ "else:\n",
+ " print(\"Audio file not found. Please run the generation cell first.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 17
},
+ "id": "P8ZgHP9gWRpt",
+ "outputId": "c71680f8-9743-4bc5-cb4f-8b9c1a7b588d"
+ },
+ "outputs": [
{
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 17
- },
- "id": "P8ZgHP9gWRpt",
- "outputId": "c71680f8-9743-4bc5-cb4f-8b9c1a7b588d"
- },
- "outputs": [
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- ""
- ],
- "application/javascript": [
- "\n",
- " async function download(id, filename, size) {\n",
- " if (!google.colab.kernel.accessAllowed) {\n",
- " return;\n",
- " }\n",
- " const div = document.createElement('div');\n",
- " const label = document.createElement('label');\n",
- " label.textContent = `Downloading \"${filename}\": `;\n",
- " div.appendChild(label);\n",
- " const progress = document.createElement('progress');\n",
- " progress.max = size;\n",
- " div.appendChild(progress);\n",
- " document.body.appendChild(div);\n",
- "\n",
- " const buffers = [];\n",
- " let downloaded = 0;\n",
- "\n",
- " const channel = await google.colab.kernel.comms.open(id);\n",
- " // Send a message to notify the kernel that we're ready.\n",
- " channel.send({})\n",
- "\n",
- " for await (const message of channel.messages) {\n",
- " // Send a message to notify the kernel that we're ready.\n",
- " channel.send({})\n",
- " if (message.buffers) {\n",
- " for (const buffer of message.buffers) {\n",
- " buffers.push(buffer);\n",
- " downloaded += buffer.byteLength;\n",
- " progress.value = downloaded;\n",
- " }\n",
- " }\n",
- " }\n",
- " const blob = new Blob(buffers, {type: 'application/binary'});\n",
- " const a = document.createElement('a');\n",
- " a.href = window.URL.createObjectURL(blob);\n",
- " a.download = filename;\n",
- " div.appendChild(a);\n",
- " a.click();\n",
- " div.remove();\n",
- " }\n",
- " "
- ]
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- ""
- ],
- "application/javascript": [
- "download(\"download_a80c3e12-7436-4a02-b10a-80fd88f924dd\", \"my_generation.mp3\", 482157)"
- ]
- },
- "metadata": {}
- }
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
],
- "source": [
- "# @title (Optional) Download MP3\n",
- "# @markdown Run this to download the generated file to your local computer.\n",
- "from google.colab import files\n",
- "if os.path.exists(output_path):\n",
- " files.download(output_path)"
+ "application/javascript": [
+ "\n",
+ " async function download(id, filename, size) {\n",
+ " if (!google.colab.kernel.accessAllowed) {\n",
+ " return;\n",
+ " }\n",
+ " const div = document.createElement('div');\n",
+ " const label = document.createElement('label');\n",
+ " label.textContent = `Downloading \"${filename}\": `;\n",
+ " div.appendChild(label);\n",
+ " const progress = document.createElement('progress');\n",
+ " progress.max = size;\n",
+ " div.appendChild(progress);\n",
+ " document.body.appendChild(div);\n",
+ "\n",
+ " const buffers = [];\n",
+ " let downloaded = 0;\n",
+ "\n",
+ " const channel = await google.colab.kernel.comms.open(id);\n",
+ " // Send a message to notify the kernel that we're ready.\n",
+ " channel.send({})\n",
+ "\n",
+ " for await (const message of channel.messages) {\n",
+ " // Send a message to notify the kernel that we're ready.\n",
+ " channel.send({})\n",
+ " if (message.buffers) {\n",
+ " for (const buffer of message.buffers) {\n",
+ " buffers.push(buffer);\n",
+ " downloaded += buffer.byteLength;\n",
+ " progress.value = downloaded;\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " const blob = new Blob(buffers, {type: 'application/binary'});\n",
+ " const a = document.createElement('a');\n",
+ " a.href = window.URL.createObjectURL(blob);\n",
+ " a.download = filename;\n",
+ " div.appendChild(a);\n",
+ " a.click();\n",
+ " div.remove();\n",
+ " }\n",
+ " "
]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "name": "python3"
+ },
+ "metadata": {}
},
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
- },
- "colab": {
- "provenance": [],
- "collapsed_sections": [
- "5NtsvvuRWRpr"
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ ""
],
- "gpuType": "T4"
- },
- "accelerator": "GPU",
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "a9890e1691ff4c8d89cd5a8e545bed0e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_d7d86758aafb4b64adeebb2e81edc71f",
- "IPY_MODEL_750d9ec077c44b6a9f04f222cc51a333",
- "IPY_MODEL_36486be0a6ed4be99f69b7dac85c11f6"
- ],
- "layout": "IPY_MODEL_22ae4ffc8f8847898a77c73f4e2cf2ef"
- }
- },
- "d7d86758aafb4b64adeebb2e81edc71f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_69644cf1574d4842ae5e53bfe713d655",
- "placeholder": "β",
- "style": "IPY_MODEL_77d94de8de3c46da85b6b4310b5f8be2",
- "value": "Loadingβcheckpointβshards:β100%"
- }
- },
- "750d9ec077c44b6a9f04f222cc51a333": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c5b1d0249fff4d5a8c7a32e674229a6a",
- "max": 4,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_c4014d4bebb64d558ced03b470c38543",
- "value": 4
- }
- },
- "36486be0a6ed4be99f69b7dac85c11f6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_460a689a399f4d9a86cc6060dcfb598d",
- "placeholder": "β",
- "style": "IPY_MODEL_138679070de94d4c9663bfe84dbfcf19",
- "value": "β4/4β[01:04<00:00,β13.86s/it]"
- }
- },
- "22ae4ffc8f8847898a77c73f4e2cf2ef": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "69644cf1574d4842ae5e53bfe713d655": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "77d94de8de3c46da85b6b4310b5f8be2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "c5b1d0249fff4d5a8c7a32e674229a6a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c4014d4bebb64d558ced03b470c38543": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "460a689a399f4d9a86cc6060dcfb598d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "138679070de94d4c9663bfe84dbfcf19": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
+ "application/javascript": [
+ "download(\"download_a80c3e12-7436-4a02-b10a-80fd88f924dd\", \"my_generation.mp3\", 482157)"
+ ]
+ },
+ "metadata": {}
}
+ ],
+ "source": [
+ "# @title (Optional) Download MP3\n",
+ "# @markdown Run this to download the generated file to your local computer.\n",
+ "from google.colab import files\n",
+ "if os.path.exists(output_path):\n",
+ " files.download(output_path)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ },
+ "colab": {
+ "provenance": [],
+ "collapsed_sections": [
+ "5NtsvvuRWRpr"
+ ],
+ "gpuType": "T4"
},
- "nbformat": 4,
- "nbformat_minor": 0
+ "accelerator": "GPU"
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
}
\ No newline at end of file