Propagate changes using 'make fix-copies'

delmalih · delmalih · commit e1c7acff295a · 2025-11-13T08:45:40.000+01:00
diff --git a/src/diffusers/schedulers/scheduling_ddim.py b/src/diffusers/schedulers/scheduling_ddim.py
@@ -38,7 +38,7 @@ class DDIMSchedulerOutput(BaseOutput):
         prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
             denoising loop.
-        pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images, *optional*):
+        pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
             The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
             `pred_original_sample` can be used to preview progress or for guidance.
     """
@@ -49,36 +49,36 @@ class DDIMSchedulerOutput(BaseOutput):
 
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
 def betas_for_alpha_bar(
-    num_diffusion_timesteps: int,
-    max_beta: float = 0.999,
-    alpha_transform_type: Literal["cosine", "exp"] = "cosine",
-) -> torch.Tensor:
+    num_diffusion_timesteps,
+    max_beta=0.999,
+    alpha_transform_type="cosine",
+):
     """
     Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
     (1-beta) over time from t = [0,1].
 
     Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
     to that part of the diffusion process.
 
+
     Args:
-        num_diffusion_timesteps (`int`):
-            The number of betas to produce.
-        max_beta (`float`, defaults to 0.999):
-            The maximum beta to use; use values lower than 1 to avoid numerical instability.
-        alpha_transform_type (`Literal["cosine", "exp"]`, defaults to `"cosine"`):
-            The type of noise schedule for `alpha_bar`. Must be one of `"cosine"` or `"exp"`.
+        num_diffusion_timesteps (`int`): the number of betas to produce.
+        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+                     prevent singularities.
+        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+                     Choose from `cosine` or `exp`
 
     Returns:
-        `torch.Tensor`: The betas used by the scheduler to step the model outputs.
+        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
     """
     if alpha_transform_type == "cosine":
 
-        def alpha_bar_fn(t: float) -> float:
+        def alpha_bar_fn(t):
             return math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2
 
     elif alpha_transform_type == "exp":
 
-        def alpha_bar_fn(t: float) -> float:
+        def alpha_bar_fn(t):
             return math.exp(t * -12.0)
 
     else:
@@ -281,21 +281,13 @@ def _get_variance(self, timestep: int, prev_timestep: int) -> torch.Tensor:
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
     def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
         """
-        Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
+        "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
         prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
         s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
         pixels from saturation at each step. We find that dynamic thresholding results in significantly better
-        photorealism as well as better image-text alignment, especially when using very large guidance weights.
-
-        See https://huggingface.co/papers/2205.11487
+        photorealism as well as better image-text alignment, especially when using very large guidance weights."
 
-        Args:
-            sample (`torch.Tensor`):
-                The sample to threshold.
-
-        Returns:
-            `torch.Tensor`:
-                The thresholded sample.
+        https://huggingface.co/papers/2205.11487
         """
         dtype = sample.dtype
         batch_size, channels, *remaining_dims = sample.shape
@@ -509,24 +501,6 @@ def add_noise(
         noise: torch.Tensor,
         timesteps: torch.IntTensor,
     ) -> torch.Tensor:
-        """
-        Add noise to the original samples according to the noise magnitude at each timestep.
-
-        This implements the forward diffusion process using the formula: `noisy_sample = sqrt(alpha_prod) *
-        original_sample + sqrt(1 - alpha_prod) * noise`
-
-        Args:
-            original_samples (`torch.Tensor`):
-                The original clean samples to which noise will be added.
-            noise (`torch.Tensor`):
-                The noise tensor to add, typically sampled from a Gaussian distribution.
-            timesteps (`torch.IntTensor`):
-                The timesteps indicating the noise level from the diffusion schedule.
-
-        Returns:
-            `torch.Tensor`:
-                The noisy samples with noise added according to the timestep schedule.
-        """
         # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
         # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
         # for the subsequent add_noise calls
@@ -549,27 +523,6 @@ def add_noise(
 
     # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
     def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
-        """
-        Compute the velocity prediction for v-prediction models.
-
-        The velocity is computed using the formula: `velocity = sqrt(alpha_prod) * noise - sqrt(1 - alpha_prod) *
-        sample`
-
-        This is used in v-prediction models where the model directly predicts the velocity instead of the noise or the
-        sample. See section 2.4 of [Imagen Video](https://huggingface.co/papers/2210.02303) paper.
-
-        Args:
-            sample (`torch.Tensor`):
-                The input sample (x_t) at the current timestep.
-            noise (`torch.Tensor`):
-                The noise tensor corresponding to the sample.
-            timesteps (`torch.IntTensor`):
-                The timesteps at which to compute the velocity.
-
-        Returns:
-            `torch.Tensor`:
-                The velocity prediction computed from the sample and noise at the given timesteps.
-        """
         # Make sure alphas_cumprod and timestep have same device and dtype as sample
         self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
         alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
diff --git a/src/diffusers/schedulers/scheduling_ddim_inverse.py b/src/diffusers/schedulers/scheduling_ddim_inverse.py
@@ -95,7 +95,6 @@ def rescale_zero_terminal_snr(betas):
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
diff --git a/src/diffusers/schedulers/scheduling_ddim_parallel.py b/src/diffusers/schedulers/scheduling_ddim_parallel.py
@@ -97,7 +97,6 @@ def rescale_zero_terminal_snr(betas):
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
@@ -194,17 +193,17 @@ def __init__(
         num_train_timesteps: int = 1000,
         beta_start: float = 0.0001,
         beta_end: float = 0.02,
-        beta_schedule: str = "linear",
+        beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear",
         trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
         clip_sample: bool = True,
         set_alpha_to_one: bool = True,
         steps_offset: int = 0,
-        prediction_type: str = "epsilon",
+        prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
         thresholding: bool = False,
         dynamic_thresholding_ratio: float = 0.995,
         clip_sample_range: float = 1.0,
         sample_max_value: float = 1.0,
-        timestep_spacing: str = "leading",
+        timestep_spacing: Literal["leading", "trailing", "linspace"] = "leading",
         rescale_betas_zero_snr: bool = False,
     ):
         if trained_betas is not None:
@@ -324,6 +323,11 @@ def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.devic
         Args:
             num_inference_steps (`int`):
                 The number of diffusion steps used when generating samples with a pre-trained model.
+            device (`Union[str, torch.device]`, *optional*):
+                The device to use for the timesteps.
+
+        Raises:
+            ValueError: If `num_inference_steps` is larger than `self.config.num_train_timesteps`.
         """
 
         if num_inference_steps > self.config.num_train_timesteps:
diff --git a/src/diffusers/schedulers/scheduling_ddpm.py b/src/diffusers/schedulers/scheduling_ddpm.py
@@ -94,7 +94,6 @@ def rescale_zero_terminal_snr(betas):
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
diff --git a/src/diffusers/schedulers/scheduling_ddpm_parallel.py b/src/diffusers/schedulers/scheduling_ddpm_parallel.py
@@ -96,7 +96,6 @@ def rescale_zero_terminal_snr(betas):
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
diff --git a/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py b/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
@@ -80,7 +80,6 @@ def rescale_zero_terminal_snr(betas):
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
diff --git a/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py b/src/diffusers/schedulers/scheduling_euler_ancestral_discrete.py
@@ -97,7 +97,6 @@ def rescale_zero_terminal_snr(betas):
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
diff --git a/src/diffusers/schedulers/scheduling_euler_discrete.py b/src/diffusers/schedulers/scheduling_euler_discrete.py
@@ -100,7 +100,6 @@ def rescale_zero_terminal_snr(betas):
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
diff --git a/src/diffusers/schedulers/scheduling_lcm.py b/src/diffusers/schedulers/scheduling_lcm.py
@@ -99,7 +99,6 @@ def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
diff --git a/src/diffusers/schedulers/scheduling_tcd.py b/src/diffusers/schedulers/scheduling_tcd.py
@@ -98,7 +98,6 @@ def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.
@@ -316,6 +315,24 @@ def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None
 
     # Copied from diffusers.schedulers.scheduling_ddim.DDIMScheduler._get_variance
     def _get_variance(self, timestep, prev_timestep):
+        """
+        Computes the variance of the noise added at a given diffusion step.
+
+        For a given `timestep` and its previous step, this method calculates the variance as defined in DDIM/DDPM
+        literature:
+            var_t = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev)
+        where alpha_prod and beta_prod are cumulative products of alphas and betas, respectively.
+
+        Args:
+            timestep (`int`):
+                The current timestep in the diffusion process.
+            prev_timestep (`int`):
+                The previous timestep in the diffusion process. If negative, uses `final_alpha_cumprod`.
+
+        Returns:
+            `torch.Tensor`:
+                The variance for the current timestep.
+        """
         alpha_prod_t = self.alphas_cumprod[timestep]
         alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod
         beta_prod_t = 1 - alpha_prod_t
diff --git a/src/diffusers/schedulers/scheduling_unipc_multistep.py b/src/diffusers/schedulers/scheduling_unipc_multistep.py
@@ -80,7 +80,6 @@ def rescale_zero_terminal_snr(betas):
     """
     Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
 
-
     Args:
         betas (`torch.Tensor`):
             the betas that the scheduler is being initialized with.