daydreamlive · BuffMcBigHuge · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/frontend/src/components/ComplexFields.tsx b/frontend/src/components/ComplexFields.tsx
@@ -49,7 +49,7 @@ export interface SchemaComplexFieldContext {
   vaceUseInputVideo?: boolean;
   onVaceUseInputVideoChange?: (enabled: boolean) => void;
   vaceContextScaleSlider?: SliderState;
-  quantization?: "fp8_e4m3fn" | null;
+  quantization?: "fp8_e4m3fn" | "nvfp4" | null;
   loras?: LoRAConfig[];
   onLorasChange?: (loras: LoRAConfig[]) => void;
   loraMergeStrategy?: LoraMergeStrategy;
@@ -63,10 +63,11 @@ export interface SchemaComplexFieldContext {
   noiseScaleSlider?: SliderState;
   noiseController?: boolean;
   onNoiseControllerChange?: (enabled: boolean) => void;
-  onQuantizationChange?: (q: "fp8_e4m3fn" | null) => void;
+  onQuantizationChange?: (q: "fp8_e4m3fn" | "nvfp4" | null) => void;
   inputMode?: "text" | "video";
   supportsNoiseControls?: boolean;
   supportsQuantization?: boolean;
+  supportsNvfp4?: boolean;
   supportsCacheManagement?: boolean;
   supportsKvCacheBias?: boolean;
   isStreaming?: boolean;
@@ -156,7 +157,7 @@ export function SchemaComplexField({
             <div className="flex items-start gap-1.5 p-2 rounded-md bg-amber-500/10 border border-amber-500/20">
               <Info className="h-3.5 w-3.5 mt-0.5 shrink-0 text-amber-600 dark:text-amber-500" />
               <p className="text-xs text-amber-600 dark:text-amber-500">
-                VACE is incompatible with FP8 quantization. Please disable
+                VACE is incompatible with quantization. Please disable
                 quantization to use VACE.
               </p>
             </div>
@@ -482,27 +483,30 @@ export function SchemaComplexField({
                 value={ctx.quantization ?? "none"}
                 onValueChange={v =>
                   ctx.onQuantizationChange?.(
-                    v === "none" ? null : (v as "fp8_e4m3fn")
+                    v === "none" ? null : (v as "fp8_e4m3fn" | "nvfp4")
                   )
                 }
                 disabled={
                   (ctx.isStreaming ?? false) || (ctx.vaceEnabled ?? false)
                 }
               >
-                <SelectTrigger className="w-[140px] h-7">
+                <SelectTrigger className="w-[180px] h-7">
                   <SelectValue />
                 </SelectTrigger>
                 <SelectContent>
                   <SelectItem value="none">None</SelectItem>
                   <SelectItem value="fp8_e4m3fn">
                     fp8_e4m3fn (Dynamic)
                   </SelectItem>
+                  {ctx.supportsNvfp4 && (
+                    <SelectItem value="nvfp4">nvfp4 (Blackwell)</SelectItem>
+                  )}
                 </SelectContent>
               </Select>
             </div>
             {ctx.vaceEnabled && (
               <p className="text-xs text-muted-foreground">
-                Disabled because VACE is enabled. Disable VACE to use FP8
+                Disabled because VACE is enabled. Disable VACE to use
                 quantization.
               </p>
             )}

diff --git a/frontend/src/components/SettingsPanel.tsx b/frontend/src/components/SettingsPanel.tsx
@@ -75,8 +75,8 @@ interface SettingsPanelProps {
   onNoiseControllerChange?: (enabled: boolean) => void;
   manageCache?: boolean;
   onManageCacheChange?: (enabled: boolean) => void;
-  quantization?: "fp8_e4m3fn" | null;
-  onQuantizationChange?: (quantization: "fp8_e4m3fn" | null) => void;
+  quantization?: "fp8_e4m3fn" | "nvfp4" | null;
+  onQuantizationChange?: (quantization: "fp8_e4m3fn" | "nvfp4" | null) => void;
   kvCacheAttentionBias?: number;
   onKvCacheAttentionBiasChange?: (bias: number) => void;
   onResetCache?: () => void;
@@ -90,6 +90,8 @@ interface SettingsPanelProps {
   // Spout settings
   spoutSender?: SettingsState["spoutSender"];
   onSpoutSenderChange?: (spoutSender: SettingsState["spoutSender"]) => void;
+  // Whether GPU supports NVFP4 quantization (Blackwell SM >= 10.0)
+  supportsNvfp4?: boolean;
   // Whether Spout is available (server-side detection for native Windows, not WSL)
   spoutAvailable?: boolean;
   // VACE settings
@@ -143,6 +145,7 @@ export function SettingsPanel({
   loraMergeStrategy = "permanent_merge",
   inputMode,
   supportsNoiseControls = false,
+  supportsNvfp4 = false,
   spoutSender,
   onSpoutSenderChange,
   spoutAvailable = false,
@@ -525,6 +528,7 @@ export function SettingsPanel({
               supportsNoiseControls,
               supportsQuantization:
                 pipelines?.[pipelineId]?.supportsQuantization,
+              supportsNvfp4,
               supportsCacheManagement:
                 pipelines?.[pipelineId]?.supportsCacheManagement,
               supportsKvCacheBias: pipelines?.[pipelineId]?.supportsKvCacheBias,
@@ -617,8 +621,8 @@ export function SettingsPanel({
                     <div className="flex items-start gap-1.5 p-2 rounded-md bg-amber-500/10 border border-amber-500/20">
                       <Info className="h-3.5 w-3.5 mt-0.5 shrink-0 text-amber-600 dark:text-amber-500" />
                       <p className="text-xs text-amber-600 dark:text-amber-500">
-                        VACE is incompatible with FP8 quantization. Please
-                        disable quantization to use VACE.
+                        VACE is incompatible with quantization. Please disable
+                        quantization to use VACE.
                       </p>
                     </div>
                   )}
@@ -951,27 +955,34 @@ export function SettingsPanel({
                           value={quantization || "none"}
                           onValueChange={value => {
                             onQuantizationChange?.(
-                              value === "none" ? null : (value as "fp8_e4m3fn")
+                              value === "none"
+                                ? null
+                                : (value as "fp8_e4m3fn" | "nvfp4")
                             );
                           }}
                           disabled={isStreaming || vaceEnabled}
                         >
-                          <SelectTrigger className="w-[140px] h-7">
+                          <SelectTrigger className="w-[180px] h-7">
                             <SelectValue />
                           </SelectTrigger>
                           <SelectContent>
                             <SelectItem value="none">None</SelectItem>
                             <SelectItem value="fp8_e4m3fn">
                               fp8_e4m3fn (Dynamic)
                             </SelectItem>
+                            {supportsNvfp4 && (
+                              <SelectItem value="nvfp4">
+                                nvfp4 (Blackwell)
+                              </SelectItem>
+                            )}
                           </SelectContent>
                         </Select>
                       </div>
                       {/* Note when quantization is disabled due to VACE */}
                       {vaceEnabled && (
                         <p className="text-xs text-muted-foreground">
                           Disabled because VACE is enabled. Disable VACE to use
-                          FP8 quantization.
+                          quantization.
                         </p>
                       )}
                     </div>

diff --git a/frontend/src/hooks/useStreamState.ts b/frontend/src/hooks/useStreamState.ts
@@ -38,7 +38,7 @@ function getFallbackDefaults(mode?: InputMode) {
     noiseController: isVideoMode ? true : undefined,
     defaultTemporalInterpolationSteps: undefined as number | undefined,
     inputMode: effectiveMode,
-    quantization: undefined as "fp8_e4m3fn" | undefined,
+    quantization: undefined as "fp8_e4m3fn" | "nvfp4" | undefined,
   };
 }
 
@@ -125,7 +125,7 @@ export function useStreamState() {
           noiseController,
           defaultTemporalInterpolationSteps,
           inputMode: effectiveMode,
-          quantization: undefined as "fp8_e4m3fn" | undefined,
+          quantization: undefined as "fp8_e4m3fn" | "nvfp4" | undefined,
         };
       }
       // Fallback to derived defaults if schemas not loaded
@@ -334,6 +334,20 @@ export function useStreamState() {
     }
   }, [settings.pipelineId, hardwareInfo, pipelineSchemas]);
 
+  // Reset nvfp4 selection if GPU doesn't support it (e.g. from persisted state)
+  useEffect(() => {
+    if (
+      hardwareInfo &&
+      !hardwareInfo.supports_nvfp4 &&
+      settings.quantization === "nvfp4"
+    ) {
+      setSettings(prev => ({
+        ...prev,
+        quantization: "fp8_e4m3fn",
+      }));
+    }
+  }, [hardwareInfo, settings.quantization]);
+
   // Set recommended VACE enabled state based on pipeline schema and available VRAM
   // VACE is enabled by default, but disabled if VRAM is below recommended_quantization_vram_threshold
   useEffect(() => {

diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts
@@ -206,6 +206,7 @@ export const downloadPipelineModels = async (
 export interface HardwareInfoResponse {
   vram_gb: number | null;
   spout_available: boolean;
+  supports_nvfp4: boolean;
 }
 
 export const getHardwareInfo = async (): Promise<HardwareInfoResponse> => {

diff --git a/frontend/src/pages/StreamPage.tsx b/frontend/src/pages/StreamPage.tsx
@@ -104,6 +104,7 @@ export function StreamPage() {
     getDefaults,
     supportsNoiseControls,
     spoutAvailable,
+    hardwareInfo,
     refreshPipelineSchemas,
     refreshHardwareInfo,
   } = useStreamState();
@@ -588,7 +589,9 @@ export function StreamPage() {
     });
   };
 
-  const handleQuantizationChange = (quantization: "fp8_e4m3fn" | null) => {
+  const handleQuantizationChange = (
+    quantization: "fp8_e4m3fn" | "nvfp4" | null
+  ) => {
     updateSettings({ quantization });
     // Note: This setting requires pipeline reload, so we don't send parameter update here
   };
@@ -1532,6 +1535,7 @@ export function StreamPage() {
             loraMergeStrategy={settings.loraMergeStrategy ?? "permanent_merge"}
             inputMode={settings.inputMode}
             supportsNoiseControls={supportsNoiseControls(settings.pipelineId)}
+            supportsNvfp4={hardwareInfo?.supports_nvfp4 ?? false}
             spoutSender={settings.spoutSender}
             onSpoutSenderChange={handleSpoutSenderChange}
             spoutAvailable={spoutAvailable}

diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts
@@ -55,7 +55,7 @@ export interface SettingsState {
   noiseScale?: number;
   noiseController?: boolean;
   manageCache?: boolean;
-  quantization?: "fp8_e4m3fn" | null;
+  quantization?: "fp8_e4m3fn" | "nvfp4" | null;
   kvCacheAttentionBias?: number;
   paused?: boolean;
   loras?: LoRAConfig[];

diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,7 @@ dependencies = [
     "uvicorn>=0.35.0",
     "torch==2.9.1",
     "torchvision==0.24.1",
+    "torchaudio==2.9.1",
     "easydict>=1.13",
     "diffusers>=0.31.0",
     "ftfy>=6.3.1",
@@ -57,6 +58,7 @@ dependencies = [
     "triton-windows==3.5.1.post24; sys_platform == 'win32'",
     "SpoutGL>=0.1.1; sys_platform == 'win32'",
     "PyOpenGL>=3.1.10; sys_platform == 'win32'",
+    "comfy-kitchen[cublas]>=0.1.0; sys_platform == 'linux' or sys_platform == 'win32'",
 ]
 
 [project.optional-dependencies]
@@ -95,6 +97,9 @@ torch = [
 torchvision = [
     { index = "pytorch-cu128", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
+torchaudio = [
+    { index = "pytorch-cu128", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+]
 flash-attn = [
     # Prebuilt Linux wheels from https://github.com/Dao-AILab/flash-attention
     { url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.9cxx11abiTRUE-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux'" },

diff --git a/src/scope/core/pipelines/enums.py b/src/scope/core/pipelines/enums.py
@@ -12,6 +12,7 @@ class Quantization(str, Enum):
     """Quantization method enumeration."""
 
     FP8_E4M3FN = "fp8_e4m3fn"
+    NVFP4 = "nvfp4"
 
 
 class VaeType(str, Enum):

diff --git a/src/scope/core/pipelines/krea_realtime_video/pipeline.py b/src/scope/core/pipelines/krea_realtime_video/pipeline.py
@@ -15,6 +15,7 @@
 )
 from ..interface import Pipeline, Requirements
 from ..process import postprocess_chunk
+from ..quantization_utils import apply_quantization
 from ..utils import Quantization, load_model_config, validate_resolution
 from ..wan2_1.components import WanDiffusionWrapper, WanTextEncoderWrapper
 from ..wan2_1.lora.mixin import LoRAEnabledPipeline
@@ -111,29 +112,7 @@ def __init__(
         # Initialize optional LoRA adapters on the underlying model AFTER VACE.
         generator.model = self._init_loras(config, generator.model)
 
-        if quantization == Quantization.FP8_E4M3FN:
-            # Cast before optional quantization
-            generator = generator.to(dtype=dtype)
-
-            start = time.time()
-
-            from torchao.quantization.quant_api import (
-                Float8DynamicActivationFloat8WeightConfig,
-                PerTensor,
-                quantize_,
-            )
-
-            # Move to target device during quantization
-            # Defaults to using fp8_e4m3fn for both weights and activations
-            quantize_(
-                generator,
-                Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()),
-                device=device,
-            )
-
-            print(f"Quantized diffusion model to fp8 in {time.time() - start:.3f}s")
-        else:
-            generator = generator.to(device=device, dtype=dtype)
+        generator = apply_quantization(generator, quantization, device, dtype)
 
         if compile:
             # Only compile the attention blocks

diff --git a/src/scope/core/pipelines/longlive/pipeline.py b/src/scope/core/pipelines/longlive/pipeline.py
@@ -15,6 +15,7 @@
 )
 from ..interface import Pipeline, Requirements
 from ..process import postprocess_chunk
+from ..quantization_utils import apply_quantization
 from ..utils import Quantization, load_model_config, validate_resolution
 from ..wan2_1.components import WanDiffusionWrapper, WanTextEncoderWrapper
 from ..wan2_1.lora.mixin import LoRAEnabledPipeline
@@ -110,29 +111,7 @@ def __init__(
         # This is additive and does not replace the original LongLive performance LoRA.
         generator.model = self._init_loras(config, generator.model)
 
-        if quantization == Quantization.FP8_E4M3FN:
-            # Cast before optional quantization
-            generator = generator.to(dtype=dtype)
-
-            start = time.time()
-
-            from torchao.quantization.quant_api import (
-                Float8DynamicActivationFloat8WeightConfig,
-                PerTensor,
-                quantize_,
-            )
-
-            # Move to target device during quantization
-            # Defaults to using fp8_e4m3fn for both weights and activations
-            quantize_(
-                generator,
-                Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()),
-                device=device,
-            )
-
-            print(f"Quantized diffusion model to fp8 in {time.time() - start:.3f}s")
-        else:
-            generator = generator.to(device=device, dtype=dtype)
+        generator = apply_quantization(generator, quantization, device, dtype)
 
         start = time.time()
         text_encoder = WanTextEncoderWrapper(

diff --git a/src/scope/core/pipelines/memflow/pipeline.py b/src/scope/core/pipelines/memflow/pipeline.py
@@ -15,6 +15,7 @@
 )
 from ..interface import Pipeline, Requirements
 from ..process import postprocess_chunk
+from ..quantization_utils import apply_quantization
 from ..utils import Quantization, load_model_config, validate_resolution
 from ..wan2_1.components import WanDiffusionWrapper, WanTextEncoderWrapper
 from ..wan2_1.lora.mixin import LoRAEnabledPipeline
@@ -110,29 +111,7 @@ def __init__(
         # This is additive and does not replace the original MemFlow performance LoRA.
         generator.model = self._init_loras(config, generator.model)
 
-        if quantization == Quantization.FP8_E4M3FN:
-            # Cast before optional quantization
-            generator = generator.to(dtype=dtype)
-
-            start = time.time()
-
-            from torchao.quantization.quant_api import (
-                Float8DynamicActivationFloat8WeightConfig,
-                PerTensor,
-                quantize_,
-            )
-
-            # Move to target device during quantization
-            # Defaults to using fp8_e4m3fn for both weights and activations
-            quantize_(
-                generator,
-                Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()),
-                device=device,
-            )
-
-            print(f"Quantized diffusion model to fp8 in {time.time() - start:.3f}s")
-        else:
-            generator = generator.to(device=device, dtype=dtype)
+        generator = apply_quantization(generator, quantization, device, dtype)
 
         start = time.time()
         text_encoder = WanTextEncoderWrapper(