k4d3
/

toolkit

Safetensors

Model card Files Files and versions Community

k4d3 commited on Oct 31, 2024

Commit

2bb76e3

1 Parent(s): cd52e87

only scale the alphas, dora only for boolean weight

Browse files

Signed-off-by: Balazs Horvath <acsipont@gmail.com>

Files changed (1) hide show

chop_blocks +83 -17

chop_blocks CHANGED Viewed

@@ -6,6 +6,7 @@ import logging
 import re
 from collections import defaultdict
 from pathlib import Path
 from safetensors.numpy import safe_open, save_file
@@ -34,7 +35,7 @@ def analyze_lora_layers(
     block2keys: dict[tuple[str, int], set[str]] = defaultdict(set)
     for k in sft_fd.keys():
-        m = RE_LORA_NAME.fullmatch(k)
         if not m:
             pass_through_keys.add(k)
             continue
@@ -50,10 +51,42 @@ def analyze_lora_layers(
         raise ValueError(
             "No UNet layers found in the LoRA checkpoint (Maybe not a SDXL model?)"
         )
-    block2keys_sorted = sorted(block2keys.items())
     return block2keys_sorted, pass_through_keys
 def print_block_layout(
     block2keys: list[tuple[tuple[str, int], set[str]]],
     weights: list[float] | None = None,
@@ -67,8 +100,8 @@ def print_block_layout(
     """
     logger.info("Blocks layout:")
     if weights is None:
-        for i, ((section, idx), v) in enumerate(block2keys):
-            logger.info(f"\t[{i:>2d}] {section:>13}.{idx} layers={len(v):<3}")
         section2shortname = {
             # SDXL names:
             "input_blocks": "INP",
@@ -86,17 +119,20 @@ def print_block_layout(
         vector_string = ",".join("0" * len(block2keys))
         logger.info(f'Example (drops all blocks): "1,{vector_string}"')
     else:
-        for i, (((section, idx), v), weight) in enumerate(zip(block2keys, weights)):
             if abs(weight) > 1e-6:
                 if abs(weight - 1) < 1e-6:
                     weight = 1
-                logger.info(
-                    f"\t[{i:>2d}] {section:>13}.{idx} layers={len(v):<3} weight={weight}"
-                )
             else:
-                logger.info(
-                    f"\t[{i:>2d}] {section:>13}.{idx} layers={len(v):<3} (removed)"
-                )
 def filter_blocks(sft_fd: safe_open, vector_string: str) -> dict[str, "numpy.ndarray"]:
@@ -122,15 +158,44 @@ def filter_blocks(sft_fd: safe_open, vector_string: str) -> dict[str, "numpy.nda
         print_block_layout(block2keys, weights_vector)
     state_dict = {}
-    for weight, (_, keys) in zip(weights_vector, block2keys):
         weight *= global_weight
         if abs(weight) < 1e-6:
             continue
-        for k in keys:
-            tensor = sft_fd.get_tensor(k)
-            if abs(weight - 1.0) > 1e-6:
-                tensor *= weight
-            state_dict[k] = tensor
     logger.info(
         "Keeping %d keys from the UNet, %d passing through (text encoders)",
@@ -190,6 +255,7 @@ def main() -> None:
             # Filter blocks and save the result
             filtered_state_dict = filter_blocks(sft_fd, args.vector_string)
             if filtered_state_dict is None:
                 exit(1)
             # Determine output path

 import re
 from collections import defaultdict
 from pathlib import Path
+import numpy as np
 from safetensors.numpy import safe_open, save_file
     block2keys: dict[tuple[str, int], set[str]] = defaultdict(set)
     for k in sft_fd.keys():
+        m = RE_LORA_NAME.fullmatch(k.replace("_0_1_transformer_blocks_", "_0_"))
         if not m:
             pass_through_keys.add(k)
             continue
         raise ValueError(
             "No UNet layers found in the LoRA checkpoint (Maybe not a SDXL model?)"
         )
+    block2keys_sorted = sorted((k, sorted(v)) for k, v in block2keys.items())
+    for k in pass_through_keys:
+        if not "te_" in k and "text_" not in k:
+            logging.warning(
+                f"key {k} removed but it doesn't look like a text encoder layer"
+            )
+    def print_layers(layers):
+        for layer, params in layers.items():
+            params = ", ".join(sorted(params))
+            dbg(f"  - {layer:<70}: {params}")
+    if logger.getEffectiveLevel() <= logging.DEBUG:
+        dbg = logger.debug
+        for (section, idx), keys in block2keys_sorted:
+            layers = groupby_layer(keys)
+            dbg(f"* {section=} {idx=} keys={len(keys)} layers={len(layers)}")
+            print_layers(layers)
+        logger.debug(f" * Pass through: ")
+        print_layers(groupby_layer(pass_through_keys))
     return block2keys_sorted, pass_through_keys
+def groupby_layer(
+    keys, make_empty=set, update=lambda vs, layer_name, param_name: vs.add(param_name)
+):
+    d = defaultdict(make_empty)
+    for k in keys:
+        layer, _, param = k.rpartition(".")
+        vs = d[layer]
+        update(vs, layer, param)
+    return d
 def print_block_layout(
     block2keys: list[tuple[tuple[str, int], set[str]]],
     weights: list[float] | None = None,
     """
     logger.info("Blocks layout:")
     if weights is None:
+        for i, ((section, idx), keys) in enumerate(block2keys):
+            logger.info(f"\t[{i:>2d}] {section:>13}.{idx} layers={len(keys):<3}")
         section2shortname = {
             # SDXL names:
             "input_blocks": "INP",
         vector_string = ",".join("0" * len(block2keys))
         logger.info(f'Example (drops all blocks): "1,{vector_string}"')
     else:
+        for i, (((section, idx), keys), weight) in enumerate(zip(block2keys, weights)):
             if abs(weight) > 1e-6:
                 if abs(weight - 1) < 1e-6:
                     weight = 1
+                w_disp = f"weight={weight}"
             else:
+                w_disp = "removed"
+            layers = len(
+                groupby_layer(keys, lambda: None, lambda _layers, _layer, _attr: None)
+            )
+            logger.info(
+                f"\t[{i:>2d}] {section:>13}.{idx} keys={len(keys):<3} layers={layers:<3} {w_disp}"
+            )
 def filter_blocks(sft_fd: safe_open, vector_string: str) -> dict[str, "numpy.ndarray"]:
         print_block_layout(block2keys, weights_vector)
     state_dict = {}
+    for weight, ((s, idx), keys) in zip(weights_vector, block2keys):
         weight *= global_weight
         if abs(weight) < 1e-6:
+            logger.debug("reject  %s:%s (%s)", s, idx, keys[0])
             continue
+        for layer, params in groupby_layer(keys).items():
+            logger.debug(
+                "accept %s:%s (%s) weight=%.2f params=%s",
+                s,
+                idx,
+                layer,
+                weight,
+                ",".join(params),
+            )
+            if "alpha" in params:
+                params.remove("alpha")
+                key = f"{layer}.alpha"
+                state_dict[key] = sft_fd.get_tensor(key) * weight
+                # if 'dora_scale' in params:
+                #     params.remove("dora_scale")
+                #     key = f"{layer}.dora_scale"
+                #     tensor = sft_fd.get_tensor(key)
+                #     if abs(weight - 1.0) > 1e-6:
+                #         tensor -= 1.0
+                #         tensor *= weight
+                #         tensor += 1.0
+                #     state_dict[key] = tensor
+                for param in params:
+                    key = f"{layer}.{param}"
+                    state_dict[key] = sft_fd.get_tensor(key)
+            else:
+                logging.warning("no alpha parameter in layer %s: %r", layer, params)
+                for param in params:
+                    key = f"{layer}.{param}"
+                    state_dict[key] = sft_fd.get_tensor(key)
     logger.info(
         "Keeping %d keys from the UNet, %d passing through (text encoders)",
             # Filter blocks and save the result
             filtered_state_dict = filter_blocks(sft_fd, args.vector_string)
             if filtered_state_dict is None:
+                logging.error("No lyaers in output!")
                 exit(1)
             # Determine output path