diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py index 8141a56644a..778884cacc1 100644 --- a/invokeai/backend/patches/layers/utils.py +++ b/invokeai/backend/patches/layers/utils.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, Tuple import torch @@ -33,3 +33,32 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseL return NormLayer.from_state_dict_values(state_dict) else: raise ValueError(f"Unsupported lora format: {state_dict.keys()}") + + +def swap_shift_scale_for_linear_weight(weight: torch.Tensor) -> torch.Tensor: + """Swap shift/scale for given linear layer back and forth""" + # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; + # while in diffusers it split into scale, shift. This will flip them around + chunk1, chunk2 = weight.chunk(2, dim=0) + return torch.cat([chunk2, chunk1], dim=0) + + +def decomposite_weight_matric_with_rank( + delta: torch.Tensor, + rank: int, + epsilon: float = 1e-8, +) -> Tuple[torch.Tensor, torch.Tensor]: + """Decompose given matrix with a specified rank.""" + U, S, V = torch.svd(delta) + + # Truncate to rank r: + U_r = U[:, :rank] + S_r = S[:rank] + V_r = V[:, :rank] + + S_sqrt = torch.sqrt(S_r + epsilon) # regularization + + up = torch.matmul(U_r, torch.diag(S_sqrt)) + down = torch.matmul(torch.diag(S_sqrt), V_r.T) + + return up, down diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py index 6a36db7b592..71f4cbecdde 100644 --- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py @@ -3,8 +3,13 @@ import torch from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch +from invokeai.backend.patches.layers.lora_layer import LoRALayer from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range -from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict +from invokeai.backend.patches.layers.utils import ( + any_lora_layer_from_state_dict, + decomposite_weight_matric_with_rank, + swap_shift_scale_for_linear_weight, +) from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX from invokeai.backend.patches.model_patch_raw import ModelPatchRaw @@ -30,6 +35,50 @@ def is_state_dict_likely_in_flux_diffusers_format(state_dict: Dict[str, torch.Te return all_keys_in_peft_format and all_expected_keys_present +def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer: + """Approximate given diffusers AdaLN loRA layer in our Flux model""" + + if "lora_up.weight" not in state_dict: + raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up") + + if "lora_down.weight" not in state_dict: + raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down") + + up = state_dict.pop("lora_up.weight") + down = state_dict.pop("lora_down.weight") + + # layer-patcher upcast things to f32, + # we want to maintain a better precison for this one + dtype = torch.float32 + + device = up.device + up_shape = up.shape + down_shape = down.shape + + # desired low rank + rank = up_shape[1] + + # up scaling for more precise + up = up.to(torch.float32) + down = down.to(torch.float32) + + weight = up.reshape(up_shape[0], -1) @ down.reshape(down_shape[0], -1) + + # swap to our linear format + swapped = swap_shift_scale_for_linear_weight(weight) + + _up, _down = decomposite_weight_matric_with_rank(swapped, rank) + + assert _up.shape == up_shape + assert _down.shape == down_shape + + # down scaling to original dtype, device + state_dict["lora_up.weight"] = _up.to(dtype).to(device=device) + state_dict["lora_down.weight"] = _down.to(dtype).to(device=device) + + return LoRALayer.from_state_dict_values(state_dict) + + def lora_model_from_flux_diffusers_state_dict( state_dict: Dict[str, torch.Tensor], alpha: float | None ) -> ModelPatchRaw: @@ -82,6 +131,12 @@ def add_lora_layer_if_present(src_key: str, dst_key: str) -> None: values = get_lora_layer_values(src_layer_dict) layers[dst_key] = any_lora_layer_from_state_dict(values) + def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None: + if src_key in grouped_state_dict: + src_layer_dict = grouped_state_dict.pop(src_key) + values = get_lora_layer_values(src_layer_dict) + layers[dst_key] = approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(values) + def add_qkv_lora_layer_if_present( src_keys: list[str], src_weight_shapes: list[tuple[int, int]], @@ -124,8 +179,8 @@ def add_qkv_lora_layer_if_present( add_lora_layer_if_present("time_text_embed.text_embedder.linear_2", "vector_in.out_layer") # time_text_embed.guidance_embedder -> guidance_in. - add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_1", "guidance_in") - add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_2", "guidance_in") + add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_1", "guidance_in.in_layer") + add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_2", "guidance_in.out_layer") # context_embedder -> txt_in. add_lora_layer_if_present("context_embedder", "txt_in") @@ -223,6 +278,10 @@ def add_qkv_lora_layer_if_present( # Final layer. add_lora_layer_if_present("proj_out", "final_layer.linear") + add_adaLN_lora_layer_if_present( + "norm_out.linear", + "final_layer.adaLN_modulation.1", + ) # Assert that all keys were processed. assert len(grouped_state_dict) == 0 diff --git a/tests/backend/patches/layers/test_layer_utils.py b/tests/backend/patches/layers/test_layer_utils.py new file mode 100644 index 00000000000..2383ec3bacf --- /dev/null +++ b/tests/backend/patches/layers/test_layer_utils.py @@ -0,0 +1,48 @@ +import torch + +from invokeai.backend.patches.layers.utils import ( + decomposite_weight_matric_with_rank, + swap_shift_scale_for_linear_weight, +) + + +def test_swap_shift_scale_for_linear_weight(): + """Test that swaping should work""" + original = torch.Tensor([1, 2]) + expected = torch.Tensor([2, 1]) + + swapped = swap_shift_scale_for_linear_weight(original) + assert torch.allclose(expected, swapped) + + size = (3, 4) + first = torch.randn(size) + second = torch.randn(size) + + original = torch.concat([first, second]) + expected = torch.concat([second, first]) + + swapped = swap_shift_scale_for_linear_weight(original) + assert torch.allclose(expected, swapped) + + # call this twice will reconstruct the original + reconstructed = swap_shift_scale_for_linear_weight(swapped) + assert torch.allclose(reconstructed, original) + + +def test_decomposite_weight_matric_with_rank(): + """Test that decompsition of given matrix into 2 low rank matrices work""" + input_dim = 1024 + output_dim = 1024 + rank = 8 # Low rank + + A = torch.randn(input_dim, rank).double() + B = torch.randn(rank, output_dim).double() + W0 = A @ B + + C, D = decomposite_weight_matric_with_rank(W0, rank) + R = C @ D + + assert C.shape == A.shape + assert D.shape == B.shape + + assert torch.allclose(W0, R) diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py new file mode 100644 index 00000000000..fd08ce8c3bf --- /dev/null +++ b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py @@ -0,0 +1,1013 @@ +# Sample state dict in the Diffusers FLUX LoRA format. +# This from Hyper-SD, having extra `norm_out` layer +# From https://huggingface.co/ByteDance/Hyper-SD/tree/main?show_file_info=Hyper-FLUX.1-dev-16steps-lora.safetensors +state_dict_keys = { + "transformer.context_embedder.lora_A.weight": [64, 4096], + "transformer.context_embedder.lora_B.weight": [3072, 64], + "transformer.norm_out.linear.lora_A.weight": [64, 3072], + "transformer.norm_out.linear.lora_B.weight": [6144, 64], + "transformer.proj_out.lora_A.weight": [64, 3072], + "transformer.proj_out.lora_B.weight": [64, 64], + "transformer.single_transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.0.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.0.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.0.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.0.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.1.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.1.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.1.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.10.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.10.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.10.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.11.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.11.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.11.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.12.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.12.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.12.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.13.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.13.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.13.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.14.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.14.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.14.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.15.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.15.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.15.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.16.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.16.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.16.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.17.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.17.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.17.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.18.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.18.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.18.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.19.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.19.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.19.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.2.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.2.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.2.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.20.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.20.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.20.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.21.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.21.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.21.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.22.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.22.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.22.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.23.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.23.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.23.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.24.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.24.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.24.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.25.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.25.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.25.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.26.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.26.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.26.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.27.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.27.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.27.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.28.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.28.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.28.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.29.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.29.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.29.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.3.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.3.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.3.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.30.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.30.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.30.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.31.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.31.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.31.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.32.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.32.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.32.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.33.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.33.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.33.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.34.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.34.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.34.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.35.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.35.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.35.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.36.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.36.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.36.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.37.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.37.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.37.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.4.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.4.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.4.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.5.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.5.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.5.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.6.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.6.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.6.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.7.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.7.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.7.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.8.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.8.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.8.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.9.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.9.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.9.proj_out.lora_B.weight": [3072, 64], + "transformer.time_text_embed.guidance_embedder.linear_1.lora_A.weight": [64, 256], + "transformer.time_text_embed.guidance_embedder.linear_1.lora_B.weight": [3072, 64], + "transformer.time_text_embed.guidance_embedder.linear_2.lora_A.weight": [64, 3072], + "transformer.time_text_embed.guidance_embedder.linear_2.lora_B.weight": [3072, 64], + "transformer.time_text_embed.text_embedder.linear_1.lora_A.weight": [64, 768], + "transformer.time_text_embed.text_embedder.linear_1.lora_B.weight": [3072, 64], + "transformer.time_text_embed.text_embedder.linear_2.lora_A.weight": [64, 3072], + "transformer.time_text_embed.text_embedder.linear_2.lora_B.weight": [3072, 64], + "transformer.time_text_embed.timestep_embedder.linear_1.lora_A.weight": [64, 256], + "transformer.time_text_embed.timestep_embedder.linear_1.lora_B.weight": [3072, 64], + "transformer.time_text_embed.timestep_embedder.linear_2.lora_A.weight": [64, 3072], + "transformer.time_text_embed.timestep_embedder.linear_2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.0.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.0.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.0.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.0.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.0.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.1.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.1.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.1.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.1.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.1.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.1.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.10.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.10.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.10.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.10.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.10.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.10.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.11.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.11.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.11.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.11.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.11.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.11.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.12.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.12.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.12.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.12.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.12.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.12.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.13.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.13.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.13.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.13.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.13.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.13.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.14.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.14.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.14.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.14.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.14.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.14.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.15.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.15.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.15.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.15.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.15.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.15.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.16.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.16.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.16.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.16.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.16.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.16.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.17.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.17.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.17.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.17.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.17.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.17.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.18.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.18.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.18.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.18.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.18.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.18.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.2.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.2.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.2.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.2.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.2.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.2.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.3.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.3.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.3.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.3.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.3.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.3.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.4.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.4.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.4.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.4.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.4.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.4.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.5.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.5.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.5.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.5.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.5.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.5.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.6.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.6.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.6.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.6.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.6.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.6.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.7.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.7.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.7.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.7.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.7.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.7.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.8.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.8.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.8.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.8.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.8.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.8.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.9.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.9.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.9.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.9.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.9.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.9.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.x_embedder.lora_A.weight": [64, 64], + "transformer.x_embedder.lora_B.weight": [3072, 64], +} diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py index 2bdb883faff..2d30bd8a678 100644 --- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py +++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py @@ -1,7 +1,11 @@ +from unittest import mock + import pytest import torch +from invokeai.backend.patches.layers.utils import swap_shift_scale_for_linear_weight from invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils import ( + approximate_flux_adaLN_lora_layer_from_diffusers_state_dict, is_state_dict_likely_in_flux_diffusers_format, lora_model_from_flux_diffusers_state_dict, ) @@ -15,13 +19,23 @@ from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_no_proj_mlp_format import ( state_dict_keys as flux_diffusers_no_proj_mlp_state_dict_keys, ) +from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_with_norm_out_format import ( + state_dict_keys as flux_diffusers_with_norm_out_state_dict_keys, +) from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_kohya_format import ( state_dict_keys as flux_kohya_state_dict_keys, ) from tests.backend.patches.lora_conversions.lora_state_dicts.utils import keys_to_mock_state_dict -@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys]) +@pytest.mark.parametrize( + "sd_keys", + [ + flux_diffusers_state_dict_keys, + flux_diffusers_no_proj_mlp_state_dict_keys, + flux_diffusers_with_norm_out_state_dict_keys, + ], +) def test_is_state_dict_likely_in_flux_diffusers_format_true(sd_keys: dict[str, list[int]]): """Test that is_state_dict_likely_in_flux_diffusers_format() can identify a state dict in the Diffusers FLUX LoRA format.""" # Construct a state dict that is in the Diffusers FLUX LoRA format. @@ -41,7 +55,14 @@ def test_is_state_dict_likely_in_flux_diffusers_format_false(sd_keys: dict[str, assert not is_state_dict_likely_in_flux_diffusers_format(state_dict) -@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys]) +@pytest.mark.parametrize( + "sd_keys", + [ + flux_diffusers_state_dict_keys, + flux_diffusers_no_proj_mlp_state_dict_keys, + flux_diffusers_with_norm_out_state_dict_keys, + ], +) def test_lora_model_from_flux_diffusers_state_dict(sd_keys: dict[str, list[int]]): """Test that lora_model_from_flux_diffusers_state_dict() can load a state dict in the Diffusers FLUX LoRA format.""" # Construct a state dict that is in the Diffusers FLUX LoRA format. @@ -75,3 +96,78 @@ def test_lora_model_from_flux_diffusers_state_dict_extra_keys_error(): # Check that an error is raised. with pytest.raises(AssertionError): lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0) + + +@pytest.mark.parametrize( + "layer_sd_keys", + [ + {}, # no keys + {"lora_A.weight": [1024, 8], "lora_B.weight": [8, 512]}, # wrong keys + { + "lora_up.weight": [1024, 8], + }, # missing key + { + "lora_down.weight": [8, 512], + }, # missing key + ], +) +def test_approximate_adaLN_from_state_dict_should_only_accept_vanilla_LoRA_format(layer_sd_keys: dict[str, list[int]]): + """Should only accept the valid state dict""" + layer_state_dict = keys_to_mock_state_dict(layer_sd_keys) + + with pytest.raises(ValueError): + approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(layer_state_dict) + + +@pytest.mark.parametrize( + "dtype, rtol", + [ + (torch.float32, 1e-4), + (torch.half, 1e-3), + ], +) +def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol: float, rate: float = 0.99): + """Test that we should approximate good enough adaLN layer from diffusers state dict. + This should tolorance some kind of errorness respect to input dtype""" + input_dim = 1024 + output_dim = 512 + rank = 8 # Low rank + total = input_dim * output_dim + + up = torch.randn(input_dim, rank, dtype=dtype) + down = torch.randn(rank, output_dim, dtype=dtype) + + layer_state_dict = {"lora_up.weight": up, "lora_down.weight": down} + + # XXX Layer patcher cast things to f32 + original = up.float() @ down.float() + swapped = swap_shift_scale_for_linear_weight(original) + + layer = approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(layer_state_dict) + weight = layer.get_weight(original).float() + + print(weight.dtype, swapped.dtype, layer.up.dtype) + + close_count = torch.isclose(weight, swapped, rtol=rtol).sum().item() + close_rate = close_count / total + + assert close_rate > rate + + +def test_adaLN_should_be_approximated_if_present_while_converting(): + """AdaLN layer should be approximated if existed inside given model""" + state_dict = keys_to_mock_state_dict(flux_diffusers_with_norm_out_state_dict_keys) + + adaLN_layer_key = "final_layer.adaLN_modulation.1" + prefixed_layer_key = FLUX_LORA_TRANSFORMER_PREFIX + adaLN_layer_key + + with mock.patch( + "invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils.approximate_flux_adaLN_lora_layer_from_diffusers_state_dict" + ) as mock_approximate_func: + model = lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0) + + # Check that the model has the correct number of LoRA layers. + assert all(k.startswith(FLUX_LORA_TRANSFORMER_PREFIX) for k in model.layers.keys()) + + assert prefixed_layer_key in model.layers.keys() + assert mock_approximate_func.call_count == 1