GGG-666
/

sd-scripts

Model card Files Files and versions

xet

Community

abc commited on Mar 24, 2023

Commit

0380cfd

1 Parent(s): 72f0b64

Upload 7 files

Browse files

Files changed (3) hide show

lycoris/kohya.py +37 -1
lycoris/locon.py +8 -19
lycoris/utils.py +72 -4

lycoris/kohya.py CHANGED Viewed

@@ -70,6 +70,12 @@ class LycorisNetwork(torch.nn.Module):
         "Downsample2D",
         "Upsample2D"
     ]
     TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"]
     LORA_PREFIX_UNET = 'lora_unet'
     LORA_PREFIX_TEXT_ENCODER = 'lora_te'
@@ -102,7 +108,12 @@ class LycorisNetwork(torch.nn.Module):
         self.dropout = dropout
         # create module instances
-        def create_modules(prefix, root_module: torch.nn.Module, target_replace_modules) -> List[network_module]:
             print('Create LyCORIS Module')
             loras = []
             for name, module in root_module.named_modules():
@@ -132,6 +143,31 @@ class LycorisNetwork(torch.nn.Module):
                         else:
                             continue
                         loras.append(lora)
             return loras
         self.text_encoder_loras = create_modules(

         "Downsample2D",
         "Upsample2D"
     ]
+    UNET_TARGET_REPLACE_NAME = [
+        "conv_in",
+        "conv_out",
+        "time_embedding.linear_1",
+        "time_embedding.linear_2",
+    ]
     TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"]
     LORA_PREFIX_UNET = 'lora_unet'
     LORA_PREFIX_TEXT_ENCODER = 'lora_te'
         self.dropout = dropout
         # create module instances
+        def create_modules(
+            prefix,
+            root_module: torch.nn.Module,
+            target_replace_modules,
+            target_replace_names = []
+        ) -> List[network_module]:
             print('Create LyCORIS Module')
             loras = []
             for name, module in root_module.named_modules():
                         else:
                             continue
                         loras.append(lora)
+                elif name in target_replace_names:
+                    lora_name = prefix + '.' + name
+                    lora_name = lora_name.replace('.', '_')
+                    if module.__class__.__name__ == 'Linear' and lora_dim>0:
+                        lora = network_module(
+                            lora_name, module, self.multiplier,
+                            self.lora_dim, self.alpha, self.dropout, use_cp
+                        )
+                    elif module.__class__.__name__ == 'Conv2d':
+                        k_size, *_ = module.kernel_size
+                        if k_size==1 and lora_dim>0:
+                            lora = network_module(
+                                lora_name, module, self.multiplier,
+                                self.lora_dim, self.alpha, self.dropout, use_cp
+                            )
+                        elif conv_lora_dim>0:
+                            lora = network_module(
+                                lora_name, module, self.multiplier,
+                                self.conv_lora_dim, self.conv_alpha, self.dropout, use_cp
+                            )
+                        else:
+                            continue
+                    else:
+                        continue
+                    loras.append(lora)
             return loras
         self.text_encoder_loras = create_modules(

lycoris/locon.py CHANGED Viewed

@@ -38,18 +38,11 @@ class LoConModule(nn.Module):
             else:
                 self.lora_down = nn.Conv2d(in_dim, lora_dim, k_size, stride, padding, bias=False)
             self.lora_up = nn.Conv2d(lora_dim, out_dim, (1, 1), bias=False)
-            self.op = F.conv2d
-            self.extra_args = {
-                'stride': stride,
-                'padding': padding
-            }
         else:
             in_dim = org_module.in_features
             out_dim = org_module.out_features
             self.lora_down = nn.Linear(in_dim, lora_dim, bias=False)
             self.lora_up = nn.Linear(lora_dim, out_dim, bias=False)
-            self.op = F.linear
-            self.extra_args = {}
         self.shape = org_module.weight.shape
         if dropout:
@@ -66,6 +59,8 @@ class LoConModule(nn.Module):
         # same as microsoft's
         torch.nn.init.kaiming_uniform_(self.lora_down.weight, a=math.sqrt(5))
         torch.nn.init.zeros_(self.lora_up.weight)
         self.multiplier = multiplier
         self.org_module = [org_module]
@@ -81,16 +76,10 @@ class LoConModule(nn.Module):
     def forward(self, x):
         if self.cp:
-            return self.dropout(
-                self.org_forward(x)
-                + self.lora_up(self.lora_mid(self.lora_down(x)))
-            ) * self.multiplier * self.scale
         else:
-            bias = None if self.org_module[0].bias is None else self.org_module[0].bias.data
-            return self.op(
-                x,
-                (self.org_module[0].weight.data
-                + self.dropout(self.make_weight()) * self.multiplier * self.scale),
-                bias,
-                **self.extra_args,
-            )

             else:
                 self.lora_down = nn.Conv2d(in_dim, lora_dim, k_size, stride, padding, bias=False)
             self.lora_up = nn.Conv2d(lora_dim, out_dim, (1, 1), bias=False)
         else:
             in_dim = org_module.in_features
             out_dim = org_module.out_features
             self.lora_down = nn.Linear(in_dim, lora_dim, bias=False)
             self.lora_up = nn.Linear(lora_dim, out_dim, bias=False)
         self.shape = org_module.weight.shape
         if dropout:
         # same as microsoft's
         torch.nn.init.kaiming_uniform_(self.lora_down.weight, a=math.sqrt(5))
         torch.nn.init.zeros_(self.lora_up.weight)
+        if self.cp:
+            torch.nn.init.kaiming_uniform_(self.lora_mid.weight, a=math.sqrt(5))
         self.multiplier = multiplier
         self.org_module = [org_module]
     def forward(self, x):
         if self.cp:
+            return self.org_forward(x)  + self.dropout(
+                self.lora_up(self.lora_mid(self.lora_down(x)))* self.multiplier * self.scale
+            )
         else:
+            return self.org_forward(x)  + self.dropout(
+                self.lora_up(self.lora_down(x))* self.multiplier * self.scale
+            )

lycoris/utils.py CHANGED Viewed

@@ -164,6 +164,12 @@ def extract_diff(
         "Downsample2D",
         "Upsample2D"
     ]
     TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"]
     LORA_PREFIX_UNET = 'lora_unet'
     LORA_PREFIX_TEXT_ENCODER = 'lora_te'
@@ -171,10 +177,12 @@ def extract_diff(
         prefix,
         root_module: torch.nn.Module,
         target_module: torch.nn.Module,
-        target_replace_modules
     ):
         loras = {}
         temp = {}
         for name, module in root_module.named_modules():
             if module.__class__.__name__ in target_replace_modules:
@@ -183,6 +191,8 @@ def extract_diff(
                     if child_module.__class__.__name__ not in {'Linear', 'Conv2d'}:
                         continue
                     temp[name][child_name] = child_module.weight
         for name, module in tqdm(list(target_module.named_modules())):
             if name in temp:
@@ -221,7 +231,7 @@ def extract_diff(
                             diff = child_module.weight - torch.einsum(
                                 'i j k l, j r, p i -> p r k l',
                                 extract_c, extract_a.flatten(1, -1), extract_b.flatten(1, -1)
-                            )
                             del extract_c
                     else:
                         continue
@@ -231,7 +241,7 @@ def extract_diff(
                     if use_bias:
                         diff = diff.detach().cpu().reshape(extract_b.size(0), -1)
-                        sparse_diff = make_sparse(diff, sparsity).to_sparse()
                         indices = sparse_diff.indices().to(torch.int16)
                         values = sparse_diff.values().half()
@@ -239,6 +249,63 @@ def extract_diff(
                         loras[f'{lora_name}.bias_values'] = values
                         loras[f'{lora_name}.bias_size'] = torch.tensor(diff.shape).to(torch.int16)
                     del extract_a, extract_b, diff
         return loras
     text_encoder_loras = make_state_dict(
@@ -250,7 +317,8 @@ def extract_diff(
     unet_loras = make_state_dict(
         LORA_PREFIX_UNET,
         base_model[2], db_model[2],
-        UNET_TARGET_REPLACE_MODULE
     )
     print(len(text_encoder_loras), len(unet_loras))
     return text_encoder_loras|unet_loras

         "Downsample2D",
         "Upsample2D"
     ]
+    UNET_TARGET_REPLACE_NAME = [
+        "conv_in",
+        "conv_out",
+        "time_embedding.linear_1",
+        "time_embedding.linear_2",
+    ]
     TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"]
     LORA_PREFIX_UNET = 'lora_unet'
     LORA_PREFIX_TEXT_ENCODER = 'lora_te'
         prefix,
         root_module: torch.nn.Module,
         target_module: torch.nn.Module,
+        target_replace_modules,
+        target_replace_names = []
     ):
         loras = {}
         temp = {}
+        temp_name = {}
         for name, module in root_module.named_modules():
             if module.__class__.__name__ in target_replace_modules:
                     if child_module.__class__.__name__ not in {'Linear', 'Conv2d'}:
                         continue
                     temp[name][child_name] = child_module.weight
+            elif name in target_replace_names:
+                temp_name[name] = module.weight
         for name, module in tqdm(list(target_module.named_modules())):
             if name in temp:
                             diff = child_module.weight - torch.einsum(
                                 'i j k l, j r, p i -> p r k l',
                                 extract_c, extract_a.flatten(1, -1), extract_b.flatten(1, -1)
+                            ).detach().cpu().contiguous()
                             del extract_c
                     else:
                         continue
                     if use_bias:
                         diff = diff.detach().cpu().reshape(extract_b.size(0), -1)
+                        sparse_diff = make_sparse(diff, sparsity).to_sparse().coalesce()
                         indices = sparse_diff.indices().to(torch.int16)
                         values = sparse_diff.values().half()
                         loras[f'{lora_name}.bias_values'] = values
                         loras[f'{lora_name}.bias_size'] = torch.tensor(diff.shape).to(torch.int16)
                     del extract_a, extract_b, diff
+            elif name in temp_name:
+                weight = temp_name[name]
+                lora_name = prefix + '.' + name
+                lora_name = lora_name.replace('.', '_')
+                if weight.size(0)<32 or weight.size(1)<32:
+                    loras[f'{lora_name}.diff'] = module.weight - weight
+                    continue
+                layer = module.__class__.__name__
+                if layer == 'Linear':
+                    extract_a, extract_b, diff = extract_linear(
+                        (module.weight - weight),
+                        mode,
+                        linear_mode_param,
+                        device = extract_device,
+                    )
+                elif layer == 'Conv2d':
+                    is_linear = (module.weight.shape[2] == 1
+                                and module.weight.shape[3] == 1)
+                    extract_a, extract_b, diff = extract_conv(
+                        (module.weight - weight),
+                        mode,
+                        linear_mode_param if is_linear else conv_mode_param,
+                        device = extract_device,
+                    )
+                    if small_conv and not is_linear:
+                        dim = extract_a.size(0)
+                        extract_c, extract_a, _ = extract_conv(
+                            extract_a.transpose(0, 1),
+                            'fixed', dim,
+                            extract_device
+                        )
+                        extract_a = extract_a.transpose(0, 1)
+                        extract_c = extract_c.transpose(0, 1)
+                        loras[f'{lora_name}.lora_mid.weight'] = extract_c.detach().cpu().contiguous().half()
+                        diff = module.weight - torch.einsum(
+                            'i j k l, j r, p i -> p r k l',
+                            extract_c, extract_a.flatten(1, -1), extract_b.flatten(1, -1)
+                        ).detach().cpu().contiguous()
+                        del extract_c
+                else:
+                    continue
+                loras[f'{lora_name}.lora_down.weight'] = extract_a.detach().cpu().contiguous().half()
+                loras[f'{lora_name}.lora_up.weight'] = extract_b.detach().cpu().contiguous().half()
+                loras[f'{lora_name}.alpha'] = torch.Tensor([extract_a.shape[0]]).half()
+                if use_bias:
+                    diff = diff.detach().cpu().reshape(extract_b.size(0), -1)
+                    sparse_diff = make_sparse(diff, sparsity).to_sparse().coalesce()
+                    indices = sparse_diff.indices().to(torch.int16)
+                    values = sparse_diff.values().half()
+                    loras[f'{lora_name}.bias_indices'] = indices
+                    loras[f'{lora_name}.bias_values'] = values
+                    loras[f'{lora_name}.bias_size'] = torch.tensor(diff.shape).to(torch.int16)
+                del extract_a, extract_b, diff
         return loras
     text_encoder_loras = make_state_dict(
     unet_loras = make_state_dict(
         LORA_PREFIX_UNET,
         base_model[2], db_model[2],
+        UNET_TARGET_REPLACE_MODULE,
+        UNET_TARGET_REPLACE_NAME
     )
     print(len(text_encoder_loras), len(unet_loras))
     return text_encoder_loras|unet_loras