invoke-ai
diff --git a/‎invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py‎
Lines changed: 0 additions & 19 deletions b/‎invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py‎
Lines changed: 0 additions & 19 deletions
diff --git a/‎invokeai/backend/patches/layers/utils.py‎
Lines changed: 66 additions & 5 deletions b/‎invokeai/backend/patches/layers/utils.py‎
Lines changed: 66 additions & 5 deletions
diff --git a/‎invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py‎
Lines changed: 2 additions & 2 deletions b/‎invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py‎
Lines changed: 0 additions & 57 deletions b/‎tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py‎
Lines changed: 0 additions & 57 deletions
diff --git a/‎tests/backend/patches/layers/test_layer_utils.py‎
Lines changed: 46 additions & 0 deletions b/‎tests/backend/patches/layers/test_layer_utils.py‎
Lines changed: 46 additions & 0 deletions
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Dict, Tuple
 
 import torch
 
@@ -10,7 +10,6 @@
 from invokeai.backend.patches.layers.lokr_layer import LoKRLayer
 from invokeai.backend.patches.layers.lora_layer import LoRALayer
 from invokeai.backend.patches.layers.norm_layer import NormLayer
-from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer
 
 
 def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseLayerPatch:
@@ -36,8 +35,70 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseL
         raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
 
 
-def diffusers_adaLN_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> DiffusersAdaLN_LoRALayer:
+
+def swap_shift_scale_for_linear_weight(weight: torch.Tensor) -> torch.Tensor:
+    """Swap shift/scale for given linear layer back and forth"""
+    # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
+    # while in diffusers it split into scale, shift. This will flip them around
+    chunk1, chunk2 = weight.chunk(2, dim=0) 
+    return torch.cat([chunk2, chunk1], dim=0)
+
+def decomposite_weight_matric_with_rank(
+    delta: torch.Tensor,
+    rank: int,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Decompose given matrix with a specified rank."""
+    U, S, V = torch.svd(delta)
+
+    # Truncate to rank r:
+    U_r = U[:, :rank]
+    S_r = S[:rank]
+    V_r = V[:, :rank]
+
+    S_sqrt = torch.sqrt(S_r)
+
+    up = torch.matmul(U_r, torch.diag(S_sqrt))
+    down = torch.matmul(torch.diag(S_sqrt), V_r.T)
+
+    return up, down
+
+
+def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer:
+    '''Approximate given diffusers AdaLN loRA layer in our Flux model'''
+
     if not "lora_up.weight" in state_dict:
-        raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
+        raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up")
 
-    return DiffusersAdaLN_LoRALayer.from_state_dict_values(state_dict)
+    if not "lora_down.weight" in state_dict:
+        raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down")
+    
+    up = state_dict.pop('lora_up.weight')
+    down = state_dict.pop('lora_down.weight')
+
+    dtype = up.dtype
+    device = up.device
+    up_shape = up.shape
+    down_shape = down.shape
+    
+    # desired low rank
+    rank = up_shape[1]
+
+    # up scaling for more precise
+    up.double()
+    down.double()
+    weight  = up.reshape(up.shape[0], -1) @ down.reshape(down.shape[0], -1)
+
+    # swap to our linear format
+    swapped = swap_shift_scale_for_linear_weight(weight)
+
+    _up, _down = decomposite_weight_matric_with_rank(swapped, rank)
+
+    assert(_up.shape == up_shape)
+    assert(_down.shape == down_shape)
+
+    # down scaling to original dtype, device
+    state_dict['lora_up.weight'] = _up.to(dtype).to(device=device)
+    state_dict['lora_down.weight'] = _down.to(dtype).to(device=device)
+
+    return LoRALayer.from_state_dict_values(state_dict)
+
@@ -4,7 +4,7 @@
 
 from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch
 from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range
-from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, diffusers_adaLN_lora_layer_from_state_dict
+from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, approximate_flux_adaLN_lora_layer_from_diffusers_state_dict
 from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
 
@@ -103,7 +103,7 @@ def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None:
         if src_key in grouped_state_dict:
             src_layer_dict = grouped_state_dict.pop(src_key)
             values = get_lora_layer_values(src_layer_dict)
-            layers[dst_key] = diffusers_adaLN_lora_layer_from_state_dict(values)
+            layers[dst_key] = approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(values)
 
     def add_qkv_lora_layer_if_present(
         src_keys: list[str],
 
@@ -0,0 +1,46 @@
+import torch
+
+from invokeai.backend.patches.layers.utils import decomposite_weight_matric_with_rank, swap_shift_scale_for_linear_weight
+
+
+def test_swap_shift_scale_for_linear_weight():
+    """Test that swaping should work"""
+    original = torch.Tensor([1, 2])
+    expected = torch.Tensor([2, 1])
+
+    swapped = swap_shift_scale_for_linear_weight(original)
+    assert(torch.allclose(expected, swapped))
+
+    size= (3, 4)
+    first = torch.randn(size)
+    second = torch.randn(size)
+
+    original = torch.concat([first, second])
+    expected = torch.concat([second, first])
+
+    swapped = swap_shift_scale_for_linear_weight(original)
+    assert(torch.allclose(expected, swapped))
+
+    # call this twice will reconstruct the original
+    reconstructed = swap_shift_scale_for_linear_weight(swapped)
+    assert(torch.allclose(reconstructed, original))
+
+def test_decomposite_weight_matric_with_rank():
+    """Test that decompsition of given matrix into 2 low rank matrices work"""
+    input_dim = 1024
+    output_dim = 1024
+    rank = 8  # Low rank
+
+
+    A = torch.randn(input_dim, rank).double()
+    B = torch.randn(rank, output_dim).double()
+    W0 = A @ B
+
+    C, D = decomposite_weight_matric_with_rank(W0, rank)
+    R = C @ D
+
+    assert(C.shape == A.shape)
+    assert(D.shape == B.shape)
+
+    assert torch.allclose(W0, R)
+