|
| 1 | +# Qwen2.5-VL-3B-Instruct Deployment Guide (ComfyUI + Intel GPU + Linux) |
| 2 | + |
| 3 | +This document provides comprehensive instructions for deploying the `Qwen2.5-VL-3.5B-Instruct` multimodal LLM on Linux systems with `Intel GPU` acceleration via `ComfyUI` workflow. |
| 4 | + |
| 5 | +## 🛠️ Installation Procedure |
| 6 | +### 1. Environment Setup |
| 7 | +```bash |
| 8 | +# Install system dependencies |
| 9 | +sudo apt update && sudo apt install -y \ |
| 10 | + git python3-pip python3-venv \ |
| 11 | + ocl-icd-opencl-dev |
| 12 | + |
| 13 | +# Configure Intel GPU drivers (if not present) |
| 14 | +sudo apt install -y \ |
| 15 | + intel-opencl-icd \ |
| 16 | + intel-level-zero-gpu \ |
| 17 | + level-zero |
| 18 | +``` |
| 19 | + |
| 20 | +### 2. Conda Environment Configuration |
| 21 | +```bash |
| 22 | +conda create -n comfyqwen python=3.11 |
| 23 | +conda activate comfyqwen |
| 24 | +``` |
| 25 | + |
| 26 | +### 3. ComfyUI Installation |
| 27 | +```bash |
| 28 | +git clone https://github.com/comfyanonymous/ComfyUI.git |
| 29 | +cd ./ComfyUI |
| 30 | + |
| 31 | +# Install Intel-optimized PyTorch |
| 32 | +pip install torch torchvision torchaudio \ |
| 33 | + --index-url https://download.pytorch.org/whl/xpu |
| 34 | + |
| 35 | +# For nightly builds with potential performance improvements: |
| 36 | +# pip install --pre torch torchvision torchaudio \ |
| 37 | +# --index-url https://download.pytorch.org/whl/nightly/xpu |
| 38 | + |
| 39 | +pip install -r requirements.txt |
| 40 | +``` |
| 41 | + |
| 42 | +### 4. Qwen2.5-VL Custom Node Deployment |
| 43 | +```bash |
| 44 | +# Download node definition files |
| 45 | +git clone https://github.com/IuvenisSapiens/ComfyUI_Qwen2_5-VL-Instruct |
| 46 | + |
| 47 | +Move the ComfyUI_Qwen2_5-VL-Instruct folder into /ComfyUI/custom_nodes/ directory |
| 48 | + |
| 49 | +Place the downloaded Qwen2.5-VL-3B-Instruct model folder into /ComfyUI/models/prompt_generator/ |
| 50 | +# If prompt_generator subdirectory doesn't exist under models, please create it first |
| 51 | +``` |
| 52 | +<details><summary>ComfyUI_Qwen2_5-VL-Instruct_workflow.json</summary> |
| 53 | +{ |
| 54 | + "id": "9f2dfc63-3d19-433d-a7c0-49d83464f553", |
| 55 | + "revision": 0, |
| 56 | + "last_node_id": 59, |
| 57 | + "last_link_id": 72, |
| 58 | + "nodes": [ |
| 59 | + { |
| 60 | + "id": 56, |
| 61 | + "type": "Qwen2_VQA", |
| 62 | + "pos": [ |
| 63 | + 199.93017578125, |
| 64 | + 46.947696685791016 |
| 65 | + ], |
| 66 | + "size": [ |
| 67 | + 322.1059265136719, |
| 68 | + 348 |
| 69 | + ], |
| 70 | + "flags": {}, |
| 71 | + "order": 2, |
| 72 | + "mode": 0, |
| 73 | + "inputs": [ |
| 74 | + { |
| 75 | + "name": "source_path", |
| 76 | + "shape": 7, |
| 77 | + "type": "PATH", |
| 78 | + "link": 70 |
| 79 | + }, |
| 80 | + { |
| 81 | + "name": "image", |
| 82 | + "shape": 7, |
| 83 | + "type": "IMAGE", |
| 84 | + "link": null |
| 85 | + } |
| 86 | + ], |
| 87 | + "outputs": [ |
| 88 | + { |
| 89 | + "name": "STRING", |
| 90 | + "type": "STRING", |
| 91 | + "slot_index": 0, |
| 92 | + "links": [ |
| 93 | + 72 |
| 94 | + ] |
| 95 | + } |
| 96 | + ], |
| 97 | + "properties": { |
| 98 | + "Node name for S&R": "Qwen2_VQA", |
| 99 | + "widget_ue_connectable": {} |
| 100 | + }, |
| 101 | + "widgets_values": [ |
| 102 | + "Describe the video in detail", |
| 103 | + "Qwen2.5-VL-3B-Instruct", |
| 104 | + "none", |
| 105 | + false, |
| 106 | + 0.7, |
| 107 | + 2048, |
| 108 | + 200704, |
| 109 | + 1003520, |
| 110 | + 1444, |
| 111 | + "randomize", |
| 112 | + "eager" |
| 113 | + ] |
| 114 | + }, |
| 115 | + { |
| 116 | + "id": 59, |
| 117 | + "type": "PreviewAny", |
| 118 | + "pos": [ |
| 119 | + 702.7207641601562, |
| 120 | + 61.4115104675293 |
| 121 | + ], |
| 122 | + "size": [ |
| 123 | + 140, |
| 124 | + 76 |
| 125 | + ], |
| 126 | + "flags": {}, |
| 127 | + "order": 3, |
| 128 | + "mode": 0, |
| 129 | + "inputs": [ |
| 130 | + { |
| 131 | + "name": "source", |
| 132 | + "type": "*", |
| 133 | + "link": 72 |
| 134 | + } |
| 135 | + ], |
| 136 | + "outputs": [], |
| 137 | + "properties": { |
| 138 | + "Node name for S&R": "PreviewAny" |
| 139 | + }, |
| 140 | + "widgets_values": [] |
| 141 | + }, |
| 142 | + { |
| 143 | + "id": 58, |
| 144 | + "type": "VideoLoader", |
| 145 | + "pos": [ |
| 146 | + -513.0911254882812, |
| 147 | + 130.9906768798828 |
| 148 | + ], |
| 149 | + "size": [ |
| 150 | + 430.6719665527344, |
| 151 | + 452.4115295410156 |
| 152 | + ], |
| 153 | + "flags": {}, |
| 154 | + "order": 0, |
| 155 | + "mode": 0, |
| 156 | + "inputs": [], |
| 157 | + "outputs": [ |
| 158 | + { |
| 159 | + "name": "VIDEO", |
| 160 | + "type": "VIDEO", |
| 161 | + "links": null |
| 162 | + }, |
| 163 | + { |
| 164 | + "name": "PATH", |
| 165 | + "type": "PATH", |
| 166 | + "links": [ |
| 167 | + 71 |
| 168 | + ] |
| 169 | + } |
| 170 | + ], |
| 171 | + "properties": { |
| 172 | + "Node name for S&R": "VideoLoader", |
| 173 | + "widget_ue_connectable": {} |
| 174 | + }, |
| 175 | + "widgets_values": [ |
| 176 | + "19_raw.mp4", |
| 177 | + "image" |
| 178 | + ] |
| 179 | + }, |
| 180 | + { |
| 181 | + "id": 57, |
| 182 | + "type": "MultiplePathsInput", |
| 183 | + "pos": [ |
| 184 | + -49.730098724365234, |
| 185 | + 137.55857849121094 |
| 186 | + ], |
| 187 | + "size": [ |
| 188 | + 210, |
| 189 | + 82 |
| 190 | + ], |
| 191 | + "flags": {}, |
| 192 | + "order": 1, |
| 193 | + "mode": 0, |
| 194 | + "inputs": [ |
| 195 | + { |
| 196 | + "name": "path_1", |
| 197 | + "type": "PATH", |
| 198 | + "link": 71 |
| 199 | + } |
| 200 | + ], |
| 201 | + "outputs": [ |
| 202 | + { |
| 203 | + "name": "paths", |
| 204 | + "type": "PATH", |
| 205 | + "slot_index": 0, |
| 206 | + "links": [ |
| 207 | + 70 |
| 208 | + ] |
| 209 | + } |
| 210 | + ], |
| 211 | + "properties": { |
| 212 | + "Node name for S&R": "MultiplePathsInput", |
| 213 | + "widget_ue_connectable": {} |
| 214 | + }, |
| 215 | + "widgets_values": [ |
| 216 | + 1 |
| 217 | + ] |
| 218 | + } |
| 219 | + ], |
| 220 | + "links": [ |
| 221 | + [ |
| 222 | + 70, |
| 223 | + 57, |
| 224 | + 0, |
| 225 | + 56, |
| 226 | + 0, |
| 227 | + "PATH" |
| 228 | + ], |
| 229 | + [ |
| 230 | + 71, |
| 231 | + 58, |
| 232 | + 1, |
| 233 | + 57, |
| 234 | + 0, |
| 235 | + "PATH" |
| 236 | + ], |
| 237 | + [ |
| 238 | + 72, |
| 239 | + 56, |
| 240 | + 0, |
| 241 | + 59, |
| 242 | + 0, |
| 243 | + "*" |
| 244 | + ] |
| 245 | + ], |
| 246 | + "groups": [], |
| 247 | + "config": {}, |
| 248 | + "extra": { |
| 249 | + "ds": { |
| 250 | + "scale": 0.9646149645000006, |
| 251 | + "offset": [ |
| 252 | + 788.9511067206646, |
| 253 | + 382.6344411516708 |
| 254 | + ] |
| 255 | + }, |
| 256 | + "frontendVersion": "1.24.4", |
| 257 | + "ue_links": [], |
| 258 | + "links_added_by_ue": [], |
| 259 | + "VHS_latentpreview": false, |
| 260 | + "VHS_latentpreviewrate": 0, |
| 261 | + "VHS_MetadataImage": true, |
| 262 | + "VHS_KeepIntermediate": true |
| 263 | + }, |
| 264 | + "version": 0.4 |
| 265 | +} |
| 266 | +</details> |
| 267 | + |
| 268 | +## 🚀 Launching ComfyUI |
| 269 | +```bash |
| 270 | +python main.py |
| 271 | +``` |
| 272 | +Access the web interface at: `http://localhost:8188` |
| 273 | + |
| 274 | +## Post-Installation Configuration |
| 275 | +1. Replace the final component node with `Preview Any` in your workflow |
| 276 | +2. Reference model path: `./models/prompt_generator/Qwen2.5-VL-3B-Instruct/` |
| 277 | + |
| 278 | + |
| 279 | + |
| 280 | +## References |
| 281 | +- [ComfyUI GitHub](https://github.com/comfyanonymous/ComfyUI) |
| 282 | +- [Intel PyTorch XPU](https://intel.github.io/intel-extension-for-pytorch/) |
| 283 | +- [Qwen2.5 Model Card](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct) |
| 284 | + |
0 commit comments