From b5ad03c8d8ed1770d0958c97c716429b74aab21a Mon Sep 17 00:00:00 2001 From: BuffMcBigHuge Date: Tue, 10 Feb 2026 19:44:33 -0500 Subject: [PATCH 1/5] Added nvfp4 official support. Signed-off-by: BuffMcBigHuge --- frontend/src/components/ComplexFields.tsx | 15 +- frontend/src/components/SettingsPanel.tsx | 15 +- frontend/src/hooks/useStreamState.ts | 4 +- frontend/src/pages/StreamPage.tsx | 2 +- frontend/src/types/index.ts | 2 +- src/scope/core/pipelines/enums.py | 1 + .../pipelines/krea_realtime_video/pipeline.py | 25 +- src/scope/core/pipelines/longlive/pipeline.py | 25 +- src/scope/core/pipelines/memflow/pipeline.py | 25 +- .../core/pipelines/quantization_utils.py | 474 ++++++++++++++++++ .../core/pipelines/reward_forcing/pipeline.py | 25 +- .../pipelines/streamdiffusionv2/pipeline.py | 25 +- src/scope/core/pipelines/utils.py | 3 + src/scope/core/pipelines/wan2_1/vace/mixin.py | 57 +-- 14 files changed, 534 insertions(+), 164 deletions(-) create mode 100644 src/scope/core/pipelines/quantization_utils.py diff --git a/frontend/src/components/ComplexFields.tsx b/frontend/src/components/ComplexFields.tsx index 77e162d32..61860e571 100644 --- a/frontend/src/components/ComplexFields.tsx +++ b/frontend/src/components/ComplexFields.tsx @@ -49,7 +49,7 @@ export interface SchemaComplexFieldContext { vaceUseInputVideo?: boolean; onVaceUseInputVideoChange?: (enabled: boolean) => void; vaceContextScaleSlider?: SliderState; - quantization?: "fp8_e4m3fn" | null; + quantization?: "fp8_e4m3fn" | "nvfp4" | null; loras?: LoRAConfig[]; onLorasChange?: (loras: LoRAConfig[]) => void; loraMergeStrategy?: LoraMergeStrategy; @@ -63,7 +63,7 @@ export interface SchemaComplexFieldContext { noiseScaleSlider?: SliderState; noiseController?: boolean; onNoiseControllerChange?: (enabled: boolean) => void; - onQuantizationChange?: (q: "fp8_e4m3fn" | null) => void; + onQuantizationChange?: (q: "fp8_e4m3fn" | "nvfp4" | null) => void; inputMode?: "text" | "video"; supportsNoiseControls?: boolean; supportsQuantization?: boolean; @@ -156,7 +156,7 @@ export function SchemaComplexField({

- VACE is incompatible with FP8 quantization. Please disable + VACE is incompatible with quantization. Please disable quantization to use VACE.

@@ -482,14 +482,14 @@ export function SchemaComplexField({ value={ctx.quantization ?? "none"} onValueChange={v => ctx.onQuantizationChange?.( - v === "none" ? null : (v as "fp8_e4m3fn") + v === "none" ? null : (v as "fp8_e4m3fn" | "nvfp4") ) } disabled={ (ctx.isStreaming ?? false) || (ctx.vaceEnabled ?? false) } > - + @@ -497,12 +497,15 @@ export function SchemaComplexField({ fp8_e4m3fn (Dynamic) + + nvfp4 (Blackwell) + {ctx.vaceEnabled && (

- Disabled because VACE is enabled. Disable VACE to use FP8 + Disabled because VACE is enabled. Disable VACE to use quantization.

)} diff --git a/frontend/src/components/SettingsPanel.tsx b/frontend/src/components/SettingsPanel.tsx index 3125e22ca..e58e7aa75 100644 --- a/frontend/src/components/SettingsPanel.tsx +++ b/frontend/src/components/SettingsPanel.tsx @@ -75,8 +75,8 @@ interface SettingsPanelProps { onNoiseControllerChange?: (enabled: boolean) => void; manageCache?: boolean; onManageCacheChange?: (enabled: boolean) => void; - quantization?: "fp8_e4m3fn" | null; - onQuantizationChange?: (quantization: "fp8_e4m3fn" | null) => void; + quantization?: "fp8_e4m3fn" | "nvfp4" | null; + onQuantizationChange?: (quantization: "fp8_e4m3fn" | "nvfp4" | null) => void; kvCacheAttentionBias?: number; onKvCacheAttentionBiasChange?: (bias: number) => void; onResetCache?: () => void; @@ -617,7 +617,7 @@ export function SettingsPanel({

- VACE is incompatible with FP8 quantization. Please + VACE is incompatible with quantization. Please disable quantization to use VACE.

@@ -951,12 +951,12 @@ export function SettingsPanel({ value={quantization || "none"} onValueChange={value => { onQuantizationChange?.( - value === "none" ? null : (value as "fp8_e4m3fn") + value === "none" ? null : (value as "fp8_e4m3fn" | "nvfp4") ); }} disabled={isStreaming || vaceEnabled} > - + @@ -964,6 +964,9 @@ export function SettingsPanel({ fp8_e4m3fn (Dynamic) + + nvfp4 (Blackwell) + @@ -971,7 +974,7 @@ export function SettingsPanel({ {vaceEnabled && (

Disabled because VACE is enabled. Disable VACE to use - FP8 quantization. + quantization.

)} diff --git a/frontend/src/hooks/useStreamState.ts b/frontend/src/hooks/useStreamState.ts index 3f76c424d..d4e827005 100644 --- a/frontend/src/hooks/useStreamState.ts +++ b/frontend/src/hooks/useStreamState.ts @@ -38,7 +38,7 @@ function getFallbackDefaults(mode?: InputMode) { noiseController: isVideoMode ? true : undefined, defaultTemporalInterpolationSteps: undefined as number | undefined, inputMode: effectiveMode, - quantization: undefined as "fp8_e4m3fn" | undefined, + quantization: undefined as "fp8_e4m3fn" | "nvfp4" | undefined, }; } @@ -125,7 +125,7 @@ export function useStreamState() { noiseController, defaultTemporalInterpolationSteps, inputMode: effectiveMode, - quantization: undefined as "fp8_e4m3fn" | undefined, + quantization: undefined as "fp8_e4m3fn" | "nvfp4" | undefined, }; } // Fallback to derived defaults if schemas not loaded diff --git a/frontend/src/pages/StreamPage.tsx b/frontend/src/pages/StreamPage.tsx index a35f5da09..beea3352b 100644 --- a/frontend/src/pages/StreamPage.tsx +++ b/frontend/src/pages/StreamPage.tsx @@ -588,7 +588,7 @@ export function StreamPage() { }); }; - const handleQuantizationChange = (quantization: "fp8_e4m3fn" | null) => { + const handleQuantizationChange = (quantization: "fp8_e4m3fn" | "nvfp4" | null) => { updateSettings({ quantization }); // Note: This setting requires pipeline reload, so we don't send parameter update here }; diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts index 35d01686d..ac0074f3c 100644 --- a/frontend/src/types/index.ts +++ b/frontend/src/types/index.ts @@ -55,7 +55,7 @@ export interface SettingsState { noiseScale?: number; noiseController?: boolean; manageCache?: boolean; - quantization?: "fp8_e4m3fn" | null; + quantization?: "fp8_e4m3fn" | "nvfp4" | null; kvCacheAttentionBias?: number; paused?: boolean; loras?: LoRAConfig[]; diff --git a/src/scope/core/pipelines/enums.py b/src/scope/core/pipelines/enums.py index 9de333da1..ca33e4a34 100644 --- a/src/scope/core/pipelines/enums.py +++ b/src/scope/core/pipelines/enums.py @@ -12,6 +12,7 @@ class Quantization(str, Enum): """Quantization method enumeration.""" FP8_E4M3FN = "fp8_e4m3fn" + NVFP4 = "nvfp4" class VaeType(str, Enum): diff --git a/src/scope/core/pipelines/krea_realtime_video/pipeline.py b/src/scope/core/pipelines/krea_realtime_video/pipeline.py index c411db2c9..603db1cee 100644 --- a/src/scope/core/pipelines/krea_realtime_video/pipeline.py +++ b/src/scope/core/pipelines/krea_realtime_video/pipeline.py @@ -15,6 +15,7 @@ ) from ..interface import Pipeline, Requirements from ..process import postprocess_chunk +from ..quantization_utils import apply_quantization from ..utils import Quantization, load_model_config, validate_resolution from ..wan2_1.components import WanDiffusionWrapper, WanTextEncoderWrapper from ..wan2_1.lora.mixin import LoRAEnabledPipeline @@ -111,29 +112,7 @@ def __init__( # Initialize optional LoRA adapters on the underlying model AFTER VACE. generator.model = self._init_loras(config, generator.model) - if quantization == Quantization.FP8_E4M3FN: - # Cast before optional quantization - generator = generator.to(dtype=dtype) - - start = time.time() - - from torchao.quantization.quant_api import ( - Float8DynamicActivationFloat8WeightConfig, - PerTensor, - quantize_, - ) - - # Move to target device during quantization - # Defaults to using fp8_e4m3fn for both weights and activations - quantize_( - generator, - Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()), - device=device, - ) - - print(f"Quantized diffusion model to fp8 in {time.time() - start:.3f}s") - else: - generator = generator.to(device=device, dtype=dtype) + generator = apply_quantization(generator, quantization, device, dtype) if compile: # Only compile the attention blocks diff --git a/src/scope/core/pipelines/longlive/pipeline.py b/src/scope/core/pipelines/longlive/pipeline.py index f0b53b55d..dc2e80c9e 100644 --- a/src/scope/core/pipelines/longlive/pipeline.py +++ b/src/scope/core/pipelines/longlive/pipeline.py @@ -15,6 +15,7 @@ ) from ..interface import Pipeline, Requirements from ..process import postprocess_chunk +from ..quantization_utils import apply_quantization from ..utils import Quantization, load_model_config, validate_resolution from ..wan2_1.components import WanDiffusionWrapper, WanTextEncoderWrapper from ..wan2_1.lora.mixin import LoRAEnabledPipeline @@ -110,29 +111,7 @@ def __init__( # This is additive and does not replace the original LongLive performance LoRA. generator.model = self._init_loras(config, generator.model) - if quantization == Quantization.FP8_E4M3FN: - # Cast before optional quantization - generator = generator.to(dtype=dtype) - - start = time.time() - - from torchao.quantization.quant_api import ( - Float8DynamicActivationFloat8WeightConfig, - PerTensor, - quantize_, - ) - - # Move to target device during quantization - # Defaults to using fp8_e4m3fn for both weights and activations - quantize_( - generator, - Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()), - device=device, - ) - - print(f"Quantized diffusion model to fp8 in {time.time() - start:.3f}s") - else: - generator = generator.to(device=device, dtype=dtype) + generator = apply_quantization(generator, quantization, device, dtype) start = time.time() text_encoder = WanTextEncoderWrapper( diff --git a/src/scope/core/pipelines/memflow/pipeline.py b/src/scope/core/pipelines/memflow/pipeline.py index e4e5b9d78..5f1efb90a 100644 --- a/src/scope/core/pipelines/memflow/pipeline.py +++ b/src/scope/core/pipelines/memflow/pipeline.py @@ -15,6 +15,7 @@ ) from ..interface import Pipeline, Requirements from ..process import postprocess_chunk +from ..quantization_utils import apply_quantization from ..utils import Quantization, load_model_config, validate_resolution from ..wan2_1.components import WanDiffusionWrapper, WanTextEncoderWrapper from ..wan2_1.lora.mixin import LoRAEnabledPipeline @@ -110,29 +111,7 @@ def __init__( # This is additive and does not replace the original MemFlow performance LoRA. generator.model = self._init_loras(config, generator.model) - if quantization == Quantization.FP8_E4M3FN: - # Cast before optional quantization - generator = generator.to(dtype=dtype) - - start = time.time() - - from torchao.quantization.quant_api import ( - Float8DynamicActivationFloat8WeightConfig, - PerTensor, - quantize_, - ) - - # Move to target device during quantization - # Defaults to using fp8_e4m3fn for both weights and activations - quantize_( - generator, - Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()), - device=device, - ) - - print(f"Quantized diffusion model to fp8 in {time.time() - start:.3f}s") - else: - generator = generator.to(device=device, dtype=dtype) + generator = apply_quantization(generator, quantization, device, dtype) start = time.time() text_encoder = WanTextEncoderWrapper( diff --git a/src/scope/core/pipelines/quantization_utils.py b/src/scope/core/pipelines/quantization_utils.py new file mode 100644 index 000000000..be40ddcfe --- /dev/null +++ b/src/scope/core/pipelines/quantization_utils.py @@ -0,0 +1,474 @@ +"""Quantization utilities for pipeline models. + +Provides shared quantization functions used across all pipelines that support +quantization (FP8 via torchao, NVFP4 via comfy-kitchen). + +NVFP4 (E2M1) provides ~4x weight memory reduction on Blackwell GPUs (SM >= 10.0) +using comfy-kitchen's QuantizedTensor and optimized CUDA kernels. + +FP8 (E4M3FN) provides ~2x weight memory reduction on Ada+ GPUs (SM >= 8.9) +using torchao's dynamic activation quantization. +""" + +from __future__ import annotations + +import gc +import logging +import time +from collections.abc import Callable + +import torch + +from .enums import Quantization + +logger = logging.getLogger(__name__) + +# ============================================================================ +# NVFP4 Support +# ============================================================================ + +# Minimum SM version for NVFP4 hardware acceleration +MIN_NVFP4_SM_VERSION = (10, 0) # Blackwell + +# Layout name for comfy-kitchen's NVFP4 layout +NVFP4_LAYOUT = "TensorCoreNVFP4Layout" + + +def check_nvfp4_support() -> tuple[bool, str]: + """Check if NVFP4 is supported on current hardware. + + Returns: + Tuple of (is_supported, reason_if_not) + """ + if not torch.cuda.is_available(): + return False, "CUDA not available" + + cap = torch.cuda.get_device_capability() + if cap < MIN_NVFP4_SM_VERSION: + return ( + False, + f"Requires SM >= {MIN_NVFP4_SM_VERSION[0]}.{MIN_NVFP4_SM_VERSION[1]} (Blackwell), " + f"current: SM {cap[0]}.{cap[1]}", + ) + + # Check if comfy-kitchen is available + try: + import comfy_kitchen # noqa: F401 + except ImportError: + return ( + False, + "comfy-kitchen package not installed. Install with: pip install comfy-kitchen[cublas]", + ) + + # Check if QuantizedTensor and NVFP4 layout are available + try: + from comfy_kitchen.tensor import ( # noqa: F401 + QuantizedTensor, + TensorCoreNVFP4Layout, + ) + except ImportError: + return False, "comfy-kitchen QuantizedTensor not available" + + return True, "" + + +class NVFP4Linear(torch.nn.Module): + """Linear layer with NVFP4 quantized weights using comfy-kitchen. + + Stores weights as comfy-kitchen QuantizedTensor which automatically + dispatches to optimized NVFP4 kernels during matmul. + + The weight is stored as an nn.Parameter containing a QuantizedTensor, + enabling the __torch_dispatch__ mechanism to route F.linear calls + to optimized NVFP4 kernels. + """ + + def __init__( + self, + in_features: int, + out_features: int, + bias: bool = True, + device: torch.device | None = None, + dtype: torch.dtype | None = None, + ): + super().__init__() + self.in_features = in_features + self.out_features = out_features + self._orig_dtype = dtype or torch.bfloat16 + self._layout_type = NVFP4_LAYOUT + + self.register_parameter("weight", None) + + if bias: + self.bias = torch.nn.Parameter( + torch.zeros(out_features, device=device, dtype=dtype or torch.bfloat16) + ) + else: + self.register_parameter("bias", None) + + @classmethod + def from_linear(cls, linear: torch.nn.Linear) -> NVFP4Linear: + """Create NVFP4Linear from a standard Linear layer. + + Note: Does NOT free the original linear layer's memory. + The caller is responsible for cleanup after this returns. + """ + from comfy_kitchen.tensor import QuantizedTensor + + in_features = linear.in_features + out_features = linear.out_features + has_bias = linear.bias is not None + device = linear.weight.device + dtype = linear.weight.dtype + + nvfp4_linear = cls( + in_features=in_features, + out_features=out_features, + bias=has_bias, + device=device, + dtype=dtype, + ) + + weight_2d = linear.weight.data.detach() + quantized_weight = QuantizedTensor.from_float(weight_2d, NVFP4_LAYOUT) + nvfp4_linear.weight = torch.nn.Parameter(quantized_weight, requires_grad=False) + + if has_bias: + nvfp4_linear.bias = torch.nn.Parameter( + linear.bias.data.detach().clone().to(dtype), requires_grad=False + ) + + return nvfp4_linear + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Forward pass with NVFP4 quantized computation.""" + from comfy_kitchen.tensor import QuantizedTensor + + orig_shape = x.shape + reshaped_3d = x.dim() == 3 + + if reshaped_3d: + x = x.reshape(-1, orig_shape[2]) + + if x.dim() == 2: + x_qt = QuantizedTensor.from_float(x, self._layout_type) + out = torch.nn.functional.linear(x_qt, self.weight, self.bias) + else: + weight_dq = ( + self.weight.dequantize() + if hasattr(self.weight, "dequantize") + else self.weight + ) + out = torch.nn.functional.linear(x, weight_dq, self.bias) + + if reshaped_3d: + out = out.reshape(orig_shape[0], orig_shape[1], self.weight.shape[0]) + + return out + + def extra_repr(self) -> str: + return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}" + + +def _default_layer_filter(name: str, module: torch.nn.Module) -> bool: + """Default filter for selecting transformer block linear layers for quantization. + + Quantizes attention projections and MLP/FFN layers. + Excludes embedding layers, layer norms, output projections, and LoRA adapters. + """ + if not isinstance(module, torch.nn.Linear): + return False + + name_lower = name.lower() + + # Skip LoRA adapter layers + name_parts = name.split(".") + is_lora_layer = any( + part.lower().startswith("lora_") or part in ("lora_A", "lora_B") + for part in name_parts + ) + if is_lora_layer: + return False + + # Skip embedding, output, and input projection layers + skip_patterns = [ + "embed", + "lm_head", + "output_proj", + "final", + "norm", + "ln_", + "layernorm", + "patchify", + "caption_projection", + ] + for pattern in skip_patterns: + if pattern in name_lower: + return False + + # Include attention and MLP layers + include_patterns = [ + "attn", + "attention", + "q_proj", + "k_proj", + "v_proj", + "o_proj", + "out_proj", + "qkv", + "mlp", + "ffn", + "fc1", + "fc2", + "gate", + "up_proj", + "down_proj", + "dense", + "linear", + "proj", + ] + + for pattern in include_patterns: + if pattern in name_lower: + return True + + # Default: quantize layers inside transformer blocks + block_patterns = ["block", "layer", "transformer"] + for pattern in block_patterns: + if pattern in name_lower: + return True + + return False + + +def quantize_model_nvfp4( + model: torch.nn.Module, + layer_filter: Callable[[str, torch.nn.Module], bool] | None = None, + streaming: bool = False, + target_device: torch.device | None = None, +) -> None: + """Quantize Linear layers in a model to NVFP4 in-place. + + Replaces nn.Linear layers with NVFP4Linear layers for ~4x weight memory + reduction and hardware-accelerated matmul on Blackwell GPUs. + + Args: + model: PyTorch model to quantize + layer_filter: Optional function (name, module) -> bool to filter layers. + If None, uses _default_layer_filter. + streaming: If True, use streaming mode for low-VRAM GPUs. + target_device: Target device for quantization (only used in streaming mode). + """ + if layer_filter is None: + layer_filter = _default_layer_filter + + # Only store layer names to avoid keeping module references alive + layer_names_to_replace: list[str] = [] + skipped_lora: list[str] = [] + + for name, module in model.named_modules(): + if isinstance(module, torch.nn.Linear): + name_parts = name.split(".") + is_lora_layer = any( + part.startswith("lora_") or part in ("lora_A", "lora_B") + for part in name_parts + ) + + if is_lora_layer: + skipped_lora.append(name) + continue + + if layer_filter(name, module): + layer_names_to_replace.append(name) + + if skipped_lora: + logger.info(f"Skipped {len(skipped_lora)} LoRA adapter layers") + + num_layers = len(layer_names_to_replace) + logger.info(f"Quantizing {num_layers} Linear layers to NVFP4") + + if streaming and target_device is None: + target_device = ( + torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") + ) + + if streaming: + logger.info( + f"Using streaming quantization mode (target device: {target_device})" + ) + + # Log memory before quantization + mem_before = 0.0 + if torch.cuda.is_available(): + gc.collect() + torch.cuda.empty_cache() + torch.cuda.synchronize() + mem_before = torch.cuda.memory_allocated() / 1024**3 + logger.info(f"GPU memory before NVFP4 quantization: {mem_before:.2f} GB") + + for i, name in enumerate(layer_names_to_replace): + parts = name.split(".") + parent = model + for part in parts[:-1]: + parent = getattr(parent, part) + + module = getattr(parent, parts[-1]) + + if not isinstance(module, torch.nn.Linear): + continue + + if streaming: + original_device = module.weight.device + if original_device != target_device: + module = module.to(target_device) + setattr(parent, parts[-1], module) + + nvfp4_module = NVFP4Linear.from_linear(module) + nvfp4_module = nvfp4_module.to("cpu") + setattr(parent, parts[-1], nvfp4_module) + + if module.weight is not None: + module.weight.data = torch.empty(0, device="cpu", dtype=torch.float32) + if module.bias is not None: + module.bias.data = torch.empty(0, device="cpu", dtype=torch.float32) + del module + + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + else: + nvfp4_module = NVFP4Linear.from_linear(module) + setattr(parent, parts[-1], nvfp4_module) + + if module.weight is not None: + module.weight.data = torch.empty(0, device="cpu", dtype=torch.float32) + if module.bias is not None: + module.bias.data = torch.empty(0, device="cpu", dtype=torch.float32) + del module + + if (i + 1) % 25 == 0: + gc.collect() + torch.cuda.empty_cache() + + if (i + 1) % 100 == 0 or (streaming and (i + 1) % 50 == 0): + if torch.cuda.is_available(): + current_mem = torch.cuda.memory_allocated() / 1024**3 + logger.info( + f"Quantized {i + 1}/{num_layers} layers, " + f"GPU memory: {current_mem:.2f} GB" + ) + + gc.collect() + torch.cuda.empty_cache() + if torch.cuda.is_available(): + torch.cuda.synchronize() + mem_after = torch.cuda.memory_allocated() / 1024**3 + mem_saved = mem_before - mem_after + if mem_saved > 0: + logger.info(f"NVFP4 quantization saved {mem_saved:.2f} GB GPU memory") + + +# ============================================================================ +# Unified Quantization API +# ============================================================================ + + +def apply_quantization( + model: torch.nn.Module, + quantization: Quantization | None, + device: torch.device | str, + dtype: torch.dtype, +) -> torch.nn.Module: + """Apply quantization to a model and move it to the target device. + + This is the shared entry point used by all pipelines that support quantization. + It handles both FP8 and NVFP4 quantization methods, falling back to a simple + device/dtype cast when quantization is None. + + Args: + model: The model to quantize (typically the diffusion generator) + quantization: Quantization method to apply, or None for no quantization + device: Target device + dtype: Target dtype (typically torch.bfloat16) + + Returns: + The quantized model on the target device + """ + if quantization == Quantization.FP8_E4M3FN: + # Cast before quantization + model = model.to(dtype=dtype) + + start = time.time() + + from torchao.quantization.quant_api import ( + Float8DynamicActivationFloat8WeightConfig, + PerTensor, + quantize_, + ) + + quantize_( + model, + Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()), + device=device, + ) + + print(f"Quantized diffusion model to fp8 in {time.time() - start:.3f}s") + + elif quantization == Quantization.NVFP4: + supported, reason = check_nvfp4_support() + if not supported: + raise RuntimeError(f"NVFP4 quantization not supported: {reason}") + + # Cast to dtype first, then move to device + model = model.to(dtype=dtype, device=device) + + start = time.time() + quantize_model_nvfp4(model, layer_filter=_default_layer_filter) + print(f"Quantized diffusion model to nvfp4 in {time.time() - start:.3f}s") + + else: + model = model.to(device=device, dtype=dtype) + + return model + + +def apply_quantization_to_module( + module: torch.nn.Module, + quantization: Quantization | None, + device: torch.device | str, + dtype: torch.dtype, +) -> None: + """Apply quantization to a specific module (e.g., VACE components). + + Unlike apply_quantization, this operates on sub-modules that are + already on the correct device and doesn't return the module. + + Args: + module: The module to quantize + quantization: Quantization method to apply + device: Target device + dtype: Target dtype + """ + if quantization is None: + return + + if quantization == Quantization.FP8_E4M3FN: + from torchao.quantization.quant_api import ( + Float8DynamicActivationFloat8WeightConfig, + PerTensor, + quantize_, + ) + + quantize_( + module, + Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()), + device=device, + ) + + elif quantization == Quantization.NVFP4: + supported, reason = check_nvfp4_support() + if not supported: + logger.warning(f"NVFP4 not supported for sub-module, skipping: {reason}") + return + + quantize_model_nvfp4(module, layer_filter=_default_layer_filter) diff --git a/src/scope/core/pipelines/reward_forcing/pipeline.py b/src/scope/core/pipelines/reward_forcing/pipeline.py index f36263f55..59983028d 100644 --- a/src/scope/core/pipelines/reward_forcing/pipeline.py +++ b/src/scope/core/pipelines/reward_forcing/pipeline.py @@ -15,6 +15,7 @@ ) from ..interface import Pipeline, Requirements from ..process import postprocess_chunk +from ..quantization_utils import apply_quantization from ..utils import Quantization, load_model_config, validate_resolution from ..wan2_1.components import WanDiffusionWrapper, WanTextEncoderWrapper from ..wan2_1.lora.mixin import LoRAEnabledPipeline @@ -84,29 +85,7 @@ def __init__( # Initialize any additional, user-configured LoRA adapters via shared manager. generator.model = self._init_loras(config, generator.model) - if quantization == Quantization.FP8_E4M3FN: - # Cast before optional quantization - generator = generator.to(dtype=dtype) - - start = time.time() - - from torchao.quantization.quant_api import ( - Float8DynamicActivationFloat8WeightConfig, - PerTensor, - quantize_, - ) - - # Move to target device during quantization - # Defaults to using fp8_e4m3fn for both weights and activations - quantize_( - generator, - Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()), - device=device, - ) - - print(f"Quantized diffusion model to fp8 in {time.time() - start:.3f}s") - else: - generator = generator.to(device=device, dtype=dtype) + generator = apply_quantization(generator, quantization, device, dtype) start = time.time() text_encoder = WanTextEncoderWrapper( diff --git a/src/scope/core/pipelines/streamdiffusionv2/pipeline.py b/src/scope/core/pipelines/streamdiffusionv2/pipeline.py index 5c4bfb46b..9d1e106a1 100644 --- a/src/scope/core/pipelines/streamdiffusionv2/pipeline.py +++ b/src/scope/core/pipelines/streamdiffusionv2/pipeline.py @@ -15,6 +15,7 @@ ) from ..interface import Pipeline, Requirements from ..process import postprocess_chunk +from ..quantization_utils import apply_quantization from ..utils import Quantization, load_model_config, validate_resolution from ..wan2_1.components import WanDiffusionWrapper, WanTextEncoderWrapper from ..wan2_1.lora.mixin import LoRAEnabledPipeline @@ -87,29 +88,7 @@ def __init__( # Initialize optional LoRA adapters on the underlying model. generator.model = self._init_loras(config, generator.model) - if quantization == Quantization.FP8_E4M3FN: - # Cast before optional quantization - generator = generator.to(dtype=dtype) - - start = time.time() - - from torchao.quantization.quant_api import ( - Float8DynamicActivationFloat8WeightConfig, - PerTensor, - quantize_, - ) - - # Move to target device during quantization - # Defaults to using fp8_e4m3fn for both weights and activations - quantize_( - generator, - Float8DynamicActivationFloat8WeightConfig(granularity=PerTensor()), - device=device, - ) - - print(f"Quantized diffusion model to fp8 in {time.time() - start:.3f}s") - else: - generator = generator.to(device=device, dtype=dtype) + generator = apply_quantization(generator, quantization, device, dtype) start = time.time() text_encoder = WanTextEncoderWrapper( diff --git a/src/scope/core/pipelines/utils.py b/src/scope/core/pipelines/utils.py index c05e5bd01..43483bf52 100644 --- a/src/scope/core/pipelines/utils.py +++ b/src/scope/core/pipelines/utils.py @@ -10,6 +10,9 @@ from .enums import Quantization as Quantization # noqa: PLC0414 from .enums import VaeType as VaeType # noqa: PLC0414 +# Re-export quantization utilities +from .quantization_utils import apply_quantization as apply_quantization # noqa: PLC0414 + def load_state_dict(weights_path: str) -> dict: """Load weights with automatic format detection.""" diff --git a/src/scope/core/pipelines/wan2_1/vace/mixin.py b/src/scope/core/pipelines/wan2_1/vace/mixin.py index d984ba023..d080a1ce3 100644 --- a/src/scope/core/pipelines/wan2_1/vace/mixin.py +++ b/src/scope/core/pipelines/wan2_1/vace/mixin.py @@ -144,42 +144,33 @@ def _init_vace( # Quantize VACE components if quantization is enabled if quantization is not None: - # Import here to avoid circular dependency try: - from ...utils import Quantization - - if quantization == Quantization.FP8_E4M3FN: - logger.info( - "_init_vace: Quantizing VACE components to FP8 (matching base model)..." - ) - start = time.time() - - from torchao.quantization.quant_api import ( - Float8DynamicActivationFloat8WeightConfig, - PerTensor, - quantize_, - ) - - quantize_( - vace_wrapped_model.vace_patch_embedding, - Float8DynamicActivationFloat8WeightConfig( - granularity=PerTensor() - ), - device=device, - ) - quantize_( - vace_wrapped_model.vace_blocks, - Float8DynamicActivationFloat8WeightConfig( - granularity=PerTensor() - ), - device=device, - ) - logger.info( - f"_init_vace: Quantized VACE to FP8 in {time.time() - start:.3f}s" - ) + from ...quantization_utils import apply_quantization_to_module + + logger.info( + f"_init_vace: Quantizing VACE components with {quantization}..." + ) + start = time.time() + + apply_quantization_to_module( + vace_wrapped_model.vace_patch_embedding, + quantization, + device, + dtype, + ) + apply_quantization_to_module( + vace_wrapped_model.vace_blocks, + quantization, + device, + dtype, + ) + + logger.info( + f"_init_vace: Quantized VACE components in {time.time() - start:.3f}s" + ) except ImportError: logger.warning( - "_init_vace: Could not import Quantization, skipping quantization check" + "_init_vace: Could not import quantization_utils, skipping quantization" ) self.vace_enabled = True From f59fa9355a6367b683b293c2201b88b4f54aed5b Mon Sep 17 00:00:00 2001 From: BuffMcBigHuge Date: Tue, 10 Feb 2026 20:01:44 -0500 Subject: [PATCH 2/5] Added dependencies required for nvfp4 and audio for future proof. Signed-off-by: BuffMcBigHuge --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 987fa935d..01ba73f9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "uvicorn>=0.35.0", "torch==2.9.1", "torchvision==0.24.1", + "torchaudio==2.9.1", "easydict>=1.13", "diffusers>=0.31.0", "ftfy>=6.3.1", @@ -57,6 +58,7 @@ dependencies = [ "triton-windows==3.5.1.post24; sys_platform == 'win32'", "SpoutGL>=0.1.1; sys_platform == 'win32'", "PyOpenGL>=3.1.10; sys_platform == 'win32'", + "comfy-kitchen[cublas]>=0.1.0; sys_platform == 'linux' or sys_platform == 'win32'", ] [project.optional-dependencies] @@ -95,6 +97,9 @@ torch = [ torchvision = [ { index = "pytorch-cu128", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, ] +torchaudio = [ + { index = "pytorch-cu128", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] flash-attn = [ # Prebuilt Linux wheels from https://github.com/Dao-AILab/flash-attention { url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.9cxx11abiTRUE-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux'" }, From 0fc457ff453915a7263d716a4a535e530d782e5e Mon Sep 17 00:00:00 2001 From: BuffMcBigHuge Date: Tue, 10 Feb 2026 20:08:03 -0500 Subject: [PATCH 3/5] Built nvfp4 detection via hardware info. Signed-off-by: BuffMcBigHuge --- frontend/src/components/ComplexFields.tsx | 9 ++++++--- frontend/src/components/SettingsPanel.tsx | 12 +++++++++--- frontend/src/hooks/useStreamState.ts | 14 ++++++++++++++ frontend/src/lib/api.ts | 1 + frontend/src/pages/StreamPage.tsx | 2 ++ src/scope/server/app.py | 6 ++++++ src/scope/server/cloud_proxy.py | 1 + src/scope/server/schema.py | 4 ++++ 8 files changed, 43 insertions(+), 6 deletions(-) diff --git a/frontend/src/components/ComplexFields.tsx b/frontend/src/components/ComplexFields.tsx index 61860e571..566a29448 100644 --- a/frontend/src/components/ComplexFields.tsx +++ b/frontend/src/components/ComplexFields.tsx @@ -67,6 +67,7 @@ export interface SchemaComplexFieldContext { inputMode?: "text" | "video"; supportsNoiseControls?: boolean; supportsQuantization?: boolean; + supportsNvfp4?: boolean; supportsCacheManagement?: boolean; supportsKvCacheBias?: boolean; isStreaming?: boolean; @@ -497,9 +498,11 @@ export function SchemaComplexField({ fp8_e4m3fn (Dynamic) - - nvfp4 (Blackwell) - + {ctx.supportsNvfp4 && ( + + nvfp4 (Blackwell) + + )} diff --git a/frontend/src/components/SettingsPanel.tsx b/frontend/src/components/SettingsPanel.tsx index e58e7aa75..1fc293d2e 100644 --- a/frontend/src/components/SettingsPanel.tsx +++ b/frontend/src/components/SettingsPanel.tsx @@ -90,6 +90,8 @@ interface SettingsPanelProps { // Spout settings spoutSender?: SettingsState["spoutSender"]; onSpoutSenderChange?: (spoutSender: SettingsState["spoutSender"]) => void; + // Whether GPU supports NVFP4 quantization (Blackwell SM >= 10.0) + supportsNvfp4?: boolean; // Whether Spout is available (server-side detection for native Windows, not WSL) spoutAvailable?: boolean; // VACE settings @@ -143,6 +145,7 @@ export function SettingsPanel({ loraMergeStrategy = "permanent_merge", inputMode, supportsNoiseControls = false, + supportsNvfp4 = false, spoutSender, onSpoutSenderChange, spoutAvailable = false, @@ -525,6 +528,7 @@ export function SettingsPanel({ supportsNoiseControls, supportsQuantization: pipelines?.[pipelineId]?.supportsQuantization, + supportsNvfp4, supportsCacheManagement: pipelines?.[pipelineId]?.supportsCacheManagement, supportsKvCacheBias: pipelines?.[pipelineId]?.supportsKvCacheBias, @@ -964,9 +968,11 @@ export function SettingsPanel({ fp8_e4m3fn (Dynamic) - - nvfp4 (Blackwell) - + {supportsNvfp4 && ( + + nvfp4 (Blackwell) + + )} diff --git a/frontend/src/hooks/useStreamState.ts b/frontend/src/hooks/useStreamState.ts index d4e827005..3b6d8e0a2 100644 --- a/frontend/src/hooks/useStreamState.ts +++ b/frontend/src/hooks/useStreamState.ts @@ -334,6 +334,20 @@ export function useStreamState() { } }, [settings.pipelineId, hardwareInfo, pipelineSchemas]); + // Reset nvfp4 selection if GPU doesn't support it (e.g. from persisted state) + useEffect(() => { + if ( + hardwareInfo && + !hardwareInfo.supports_nvfp4 && + settings.quantization === "nvfp4" + ) { + setSettings(prev => ({ + ...prev, + quantization: "fp8_e4m3fn", + })); + } + }, [hardwareInfo, settings.quantization]); + // Set recommended VACE enabled state based on pipeline schema and available VRAM // VACE is enabled by default, but disabled if VRAM is below recommended_quantization_vram_threshold useEffect(() => { diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index fbea0bc9e..3f9cbce4c 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -206,6 +206,7 @@ export const downloadPipelineModels = async ( export interface HardwareInfoResponse { vram_gb: number | null; spout_available: boolean; + supports_nvfp4: boolean; } export const getHardwareInfo = async (): Promise => { diff --git a/frontend/src/pages/StreamPage.tsx b/frontend/src/pages/StreamPage.tsx index beea3352b..60b61765f 100644 --- a/frontend/src/pages/StreamPage.tsx +++ b/frontend/src/pages/StreamPage.tsx @@ -104,6 +104,7 @@ export function StreamPage() { getDefaults, supportsNoiseControls, spoutAvailable, + hardwareInfo, refreshPipelineSchemas, refreshHardwareInfo, } = useStreamState(); @@ -1532,6 +1533,7 @@ export function StreamPage() { loraMergeStrategy={settings.loraMergeStrategy ?? "permanent_merge"} inputMode={settings.inputMode} supportsNoiseControls={supportsNoiseControls(settings.pipelineId)} + supportsNvfp4={hardwareInfo?.supports_nvfp4 ?? false} spoutSender={settings.spoutSender} onSpoutSenderChange={handleSpoutSenderChange} spoutAvailable={spoutAvailable} diff --git a/src/scope/server/app.py b/src/scope/server/app.py index f14769ddc..bb2a61de5 100644 --- a/src/scope/server/app.py +++ b/src/scope/server/app.py @@ -1152,15 +1152,21 @@ async def get_hardware_info( import torch # Lazy import to avoid loading at CLI startup vram_gb = None + supports_nvfp4 = False if torch.cuda.is_available(): # Get total VRAM from the first GPU (in bytes), convert to GB _, total_mem = torch.cuda.mem_get_info(0) vram_gb = total_mem / (1024**3) + # Blackwell GPUs (SM >= 10.0) support NVFP4 quantization + cap = torch.cuda.get_device_capability() + supports_nvfp4 = cap >= (10, 0) + return HardwareInfoResponse( vram_gb=vram_gb, spout_available=is_spout_available(), + supports_nvfp4=supports_nvfp4, ) except HTTPException: raise diff --git a/src/scope/server/cloud_proxy.py b/src/scope/server/cloud_proxy.py index 2e1f4d623..90b085fc9 100644 --- a/src/scope/server/cloud_proxy.py +++ b/src/scope/server/cloud_proxy.py @@ -130,6 +130,7 @@ async def get_hardware_info_from_cloud( return HardwareInfoResponse( vram_gb=data.get("vram_gb"), spout_available=spout_available, + supports_nvfp4=data.get("supports_nvfp4", False), ) diff --git a/src/scope/server/schema.py b/src/scope/server/schema.py index e0b9a2a91..3950d2217 100644 --- a/src/scope/server/schema.py +++ b/src/scope/server/schema.py @@ -251,6 +251,10 @@ class HardwareInfoResponse(BaseModel): default=False, description="Whether Spout is available (Windows only, not WSL)", ) + supports_nvfp4: bool = Field( + default=False, + description="Whether GPU supports NVFP4 quantization (Blackwell SM >= 10.0)", + ) class PipelineStatusEnum(str, Enum): From ca83d1b46f06dd6d74c648d1b803cbc8268e3385 Mon Sep 17 00:00:00 2001 From: BuffMcBigHuge Date: Tue, 10 Feb 2026 20:43:21 -0500 Subject: [PATCH 4/5] Linting. Signed-off-by: BuffMcBigHuge --- frontend/src/components/ComplexFields.tsx | 4 +- frontend/src/components/SettingsPanel.tsx | 8 +- frontend/src/pages/StreamPage.tsx | 4 +- uv.lock | 92 +++++++++++++++++++++++ 4 files changed, 101 insertions(+), 7 deletions(-) diff --git a/frontend/src/components/ComplexFields.tsx b/frontend/src/components/ComplexFields.tsx index 566a29448..53da45962 100644 --- a/frontend/src/components/ComplexFields.tsx +++ b/frontend/src/components/ComplexFields.tsx @@ -499,9 +499,7 @@ export function SchemaComplexField({ fp8_e4m3fn (Dynamic) {ctx.supportsNvfp4 && ( - - nvfp4 (Blackwell) - + nvfp4 (Blackwell) )} diff --git a/frontend/src/components/SettingsPanel.tsx b/frontend/src/components/SettingsPanel.tsx index 1fc293d2e..fc8f1c6b6 100644 --- a/frontend/src/components/SettingsPanel.tsx +++ b/frontend/src/components/SettingsPanel.tsx @@ -621,8 +621,8 @@ export function SettingsPanel({

- VACE is incompatible with quantization. Please - disable quantization to use VACE. + VACE is incompatible with quantization. Please disable + quantization to use VACE.

)} @@ -955,7 +955,9 @@ export function SettingsPanel({ value={quantization || "none"} onValueChange={value => { onQuantizationChange?.( - value === "none" ? null : (value as "fp8_e4m3fn" | "nvfp4") + value === "none" + ? null + : (value as "fp8_e4m3fn" | "nvfp4") ); }} disabled={isStreaming || vaceEnabled} diff --git a/frontend/src/pages/StreamPage.tsx b/frontend/src/pages/StreamPage.tsx index 60b61765f..b8e6f133a 100644 --- a/frontend/src/pages/StreamPage.tsx +++ b/frontend/src/pages/StreamPage.tsx @@ -589,7 +589,9 @@ export function StreamPage() { }); }; - const handleQuantizationChange = (quantization: "fp8_e4m3fn" | "nvfp4" | null) => { + const handleQuantizationChange = ( + quantization: "fp8_e4m3fn" | "nvfp4" | null + ) => { updateSettings({ quantization }); // Note: This setting requires pipeline reload, so we don't send parameter update here }; diff --git a/uv.lock b/uv.lock index cc6638621..5b4470e16 100644 --- a/uv.lock +++ b/uv.lock @@ -462,6 +462,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "comfy-kitchen" +version = "0.2.7" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/e1/9b6e7764f8dcd5cb9b9ae369e55660bf24b7f48825584521246e3bddf43e/comfy_kitchen-0.2.7-cp312-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4a168eb1fcdbb31707bb0e1226c6d44e1bd1b0a5ac1ac0a4d9c6eb7296b903ae", size = 680629, upload-time = "2026-01-17T03:48:13.922Z" }, + { url = "https://files.pythonhosted.org/packages/b5/6b/1cea270d5014a465929375c434c2f78a35fadde5dfb6f436864e4c8f7a52/comfy_kitchen-0.2.7-cp312-abi3-win_amd64.whl", hash = "sha256:047b9ac7c8c1a845a51b0de3fb05c8d007666d68a3e776e07ecb5db21f15fbdd", size = 592877, upload-time = "2026-01-17T03:48:15.262Z" }, + { url = "https://files.pythonhosted.org/packages/f8/65/d483613734d0b9753bd9bfa297ff334cb2c7766e82306099db6b259b4e2c/comfy_kitchen-0.2.7-py3-none-any.whl", hash = "sha256:f8faa579b69d331d2f1eac09e96a95586c2a6b958a54bc19e7f1c1a77852dd36", size = 58034, upload-time = "2026-01-17T03:48:16.561Z" }, +] + +[package.optional-dependencies] +cublas = [ + { name = "nvidia-cublas", marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, +] + [[package]] name = "cryptography" version = "46.0.3" @@ -527,6 +542,7 @@ dependencies = [ { name = "aiohttp" }, { name = "aiortc" }, { name = "click" }, + { name = "comfy-kitchen", extra = ["cublas"], marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "diffusers" }, { name = "easydict" }, { name = "einops" }, @@ -549,6 +565,9 @@ dependencies = [ { name = "torch", version = "2.9.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, { name = "torch", version = "2.9.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "torchao" }, + { name = "torchaudio", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'" }, + { name = "torchaudio", version = "2.9.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, + { name = "torchaudio", version = "2.9.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(python_full_version >= '3.15' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux') or sys_platform == 'win32'" }, { name = "torchvision", version = "0.24.1", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'" }, { name = "torchvision", version = "0.24.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, { name = "torchvision", version = "0.24.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(python_full_version >= '3.15' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux') or sys_platform == 'win32'" }, @@ -582,6 +601,7 @@ requires-dist = [ { name = "aiokafka", marker = "extra == 'kafka'", specifier = ">=0.10.0" }, { name = "aiortc", specifier = ">=1.13.0" }, { name = "click", specifier = ">=8.3.1" }, + { name = "comfy-kitchen", extras = ["cublas"], marker = "sys_platform == 'linux' or sys_platform == 'win32'", specifier = ">=0.1.0" }, { name = "diffusers", specifier = ">=0.31.0" }, { name = "easydict", specifier = ">=1.13" }, { name = "einops", specifier = ">=0.8.1" }, @@ -604,6 +624,8 @@ requires-dist = [ { name = "torch", marker = "sys_platform != 'linux' and sys_platform != 'win32'", specifier = "==2.9.1" }, { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'win32'", specifier = "==2.9.1", index = "https://download.pytorch.org/whl/cu128" }, { name = "torchao", specifier = "==0.15.0" }, + { name = "torchaudio", marker = "sys_platform != 'linux' and sys_platform != 'win32'", specifier = "==2.9.1" }, + { name = "torchaudio", marker = "sys_platform == 'linux' or sys_platform == 'win32'", specifier = "==2.9.1", index = "https://download.pytorch.org/whl/cu128" }, { name = "torchvision", marker = "sys_platform != 'linux' and sys_platform != 'win32'", specifier = "==0.24.1" }, { name = "torchvision", marker = "sys_platform == 'linux' or sys_platform == 'win32'", specifier = "==0.24.1", index = "https://download.pytorch.org/whl/cu128" }, { name = "transformers", specifier = ">=4.49.0" }, @@ -1505,6 +1527,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121, upload-time = "2026-01-10T06:44:41.644Z" }, ] +[[package]] +name = "nvidia-cublas" +version = "13.2.1.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/36/0124129e1378e9834e0cbe19781fbe0ffd5f870c2af6f01cdf17a9869c39/nvidia_cublas-13.2.1.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:8b4a4cd8b73772fde9ccaa1f3967eb001ae5fde8b1dc37f7442d072b64d6f5da", size = 502470979, upload-time = "2026-01-13T22:39:37.619Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e7/39e43c0688f9788c88da0b91ea18125448c5f515104aadf65a70243f144f/nvidia_cublas-13.2.1.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8c13c93cf8be4480b4909905c96d2d31575b4af43fcd3af0e84af94762665e4f", size = 401085577, upload-time = "2026-01-13T22:40:18.702Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9b/d9788b63872c6e4ce0fb292f2000642e73a8ae4da2d6f6b33759b77059af/nvidia_cublas-13.2.1.1-py3-none-win_amd64.whl", hash = "sha256:bc94f0597c21cfd6fea9446b18309b2351630ff227bb4e8575196494fb51c6b6", size = 385519499, upload-time = "2026-01-13T22:57:28.499Z" }, +] + [[package]] name = "nvidia-cublas-cu12" version = "12.8.4.1" @@ -2558,6 +2590,66 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f6/3b/6b9d5618720f63dbc2e2509cd6b57aae9c0d61b738d1d2172f4d5d9efaab/torchao-0.15.0-py3-none-any.whl", hash = "sha256:3f3812676048ef8a2a0e9d492d12d8971ba7a7ebb16f54aa56f690414e130d2c", size = 1080679, upload-time = "2025-12-18T23:14:43.807Z" }, ] +[[package]] +name = "torchaudio" +version = "2.9.1" +source = { registry = "https://download.pytorch.org/whl/cu128" } +resolution-markers = [ + "python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'", +] +dependencies = [ + { name = "torch", version = "2.9.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:59f70f6aa6a7e77a1fd51756d7d25fec22bead0b50ce7bed4ede75a5fa6b21d1" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ea6fe3b9525493df0a5eb5eed5c22925065b5830f6999980ed76bb36c4592d34" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:9d3cfd604a617a245d26a5381ad7d669047ac1c152896227d8a006aad12151f8" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:bb790f968539f6b4115e637dbf3aab71f3f92e41f0c12e6bc7d52324f0051113" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:7d8da8816dfa25869da206eb4cdddb11603b042ab59326c6466a65b6e64d2684" }, +] + +[[package]] +name = "torchaudio" +version = "2.9.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "sys_platform != 'linux' and sys_platform != 'win32'", +] +dependencies = [ + { name = "torch", version = "2.9.1", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/83/71cbadd7b66753818b5775f2088bad4f721d581de276996df4968000a626/torchaudio-2.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7581ef170794c599aed55918e00d0acd9e5c9a0f19400c9a9a840955180365c5", size = 808098, upload-time = "2025-11-12T15:26:01.408Z" }, + { url = "https://files.pythonhosted.org/packages/c0/1b/3321ad6379ac2d968064704e8d015c31ccae5d1ece070f87fb44b17d90e6/torchaudio-2.9.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:bb69557484c92513a980027ec4cb314b0f43cf4442bbfd97440e66528dbad22d", size = 808136, upload-time = "2025-11-12T15:26:00.276Z" }, + { url = "https://files.pythonhosted.org/packages/0c/58/e82d8b5f447abdddc950965f1395f36baef3602643dd069100c6369ba73e/torchaudio-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9290f6a6409deb1f9113d5aef97ec646eeee6410b6bcc57ab8b57066b54da7c1", size = 813456, upload-time = "2025-11-12T15:26:13.963Z" }, + { url = "https://files.pythonhosted.org/packages/5b/38/0dabf362f946ab5773d3db3322718d652d70ad12a82f500d54c6c8b9cc88/torchaudio-2.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:69a582650279ee16ff9087f99b4234fe5d766e1bf7f0be352db5f46991854c1e", size = 810496, upload-time = "2025-11-12T15:26:11.515Z" }, + { url = "https://files.pythonhosted.org/packages/9c/f6/237e00a04dea497a40a8567d024dfb39193abec3ca3695ad51919ad633d1/torchaudio-2.9.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e13cb38971ac259fc4e102282a3e48f6df5f0ab00eb785ca5155e3392d1e86f1", size = 813463, upload-time = "2025-11-12T15:26:16.261Z" }, +] + +[[package]] +name = "torchaudio" +version = "2.9.1+cu128" +source = { registry = "https://download.pytorch.org/whl/cu128" } +resolution-markers = [ + "(python_full_version >= '3.15' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux')", + "sys_platform == 'win32'", +] +dependencies = [ + { name = "torch", version = "2.9.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(python_full_version >= '3.15' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux') or sys_platform == 'win32'" }, +] +wheels = [ + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:54eb19e634b8c567886a1b53b4184506d943c3ba5139198e9fe1b941bc566f30" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:88896c7bfa486102439fab6c85ac834176617e9c06eb0be9074c07ee1183b47d" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:52297b7dfb7c42e311385572bc9c0186e602ea1a5f20c42923765baea99aff83" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:abb0ee5a40c883ad17d90cdd45965c06deef42a0a2ffc58c51e32729642292f0" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ddc7410908858693d3b81346f53b5e5e51f987b3b7128978be6c774314377204" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:9d29dc3a2e0c43da66d33bcb9e22ad58c58f0ae1b6dcfe2d8d94bda279ddcf89" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:12c3e3d3aaf856d679328a5a9d46d866bc88b4c5290f2128f306abff975fa51e" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp314-cp314-win_amd64.whl", hash = "sha256:eb6c714557c8d47f4fc65ec58b14a21cb4150940c242fe77e7517636c20ed3c3" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6d9f5d53861b2fc057c1dd5051721f60b2253176c44a856d0f19100e312add3f" }, + { url = "https://download.pytorch.org/whl/cu128/torchaudio-2.9.1%2Bcu128-cp314-cp314t-win_amd64.whl", hash = "sha256:150a8d7d51df9f667b5386cff5850f685b6059c59db51e056d7157955aad9e75" }, +] + [[package]] name = "torchvision" version = "0.24.1" From 202b03148a14f5579a34ab6f8000e97f68fdc1d5 Mon Sep 17 00:00:00 2001 From: BuffMcBigHuge Date: Tue, 10 Feb 2026 20:45:41 -0500 Subject: [PATCH 5/5] Linting. Signed-off-by: BuffMcBigHuge --- src/scope/core/pipelines/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/scope/core/pipelines/utils.py b/src/scope/core/pipelines/utils.py index 43483bf52..3d2f022be 100644 --- a/src/scope/core/pipelines/utils.py +++ b/src/scope/core/pipelines/utils.py @@ -11,7 +11,9 @@ from .enums import VaeType as VaeType # noqa: PLC0414 # Re-export quantization utilities -from .quantization_utils import apply_quantization as apply_quantization # noqa: PLC0414 +from .quantization_utils import ( + apply_quantization as apply_quantization, # noqa: PLC0414 +) def load_state_dict(weights_path: str) -> dict: