WAN2-1-fast-T2V-FusioniX

Running on Zero

App Files Files Community

multimodalart HF Staff commited on May 19

Commit

2acf5ad

verified ·

1 Parent(s): 5158fc3

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -190

app.py CHANGED Viewed

@@ -19,176 +19,6 @@ LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-MANUAL_PATCHES_STORE = {"diff": {}, "diff_b": {}}
-def _custom_convert_non_diffusers_wan_lora_to_diffusers(state_dict):
-    global MANUAL_PATCHES_STORE
-    MANUAL_PATCHES_STORE = {"diff": {}, "diff_b": {}} # Reset for each conversion
-    peft_compatible_state_dict = {}
-    unhandled_keys = []
-    original_keys_map_to_diffusers = {}
-    # Mapping based on ComfyUI's WanModel structure and PeftAdapterMixin logic
-    # This needs to map the original LoRA key naming to Diffusers' expected PEFT keys
-    # diffusion_model.blocks.0.self_attn.q.lora_down.weight -> transformer.blocks.0.attn1.to_q.lora_A.weight
-    # diffusion_model.blocks.0.ffn.0.lora_down.weight -> transformer.blocks.0.ffn.net.0.proj.lora_A.weight
-    # diffusion_model.text_embedding.0.lora_down.weight -> transformer.condition_embedder.text_embedder.linear_1.lora_A.weight (example)
-    # Strip "diffusion_model." and map
-    for k, v in state_dict.items():
-        original_k = k # Keep for logging/debugging
-        if k.startswith("diffusion_model."):
-            k_stripped = k[len("diffusion_model."):]
-        elif k.startswith("difusion_model."): # Handle potential typo
-            k_stripped = k[len("difusion_model."):]
-            logger.warning(f"Key '{original_k}' starts with 'difusion_model.' (potential typo), processing as 'diffusion_model.'.")
-        else:
-            unhandled_keys.append(original_k)
-            continue
-        # Handle .diff and .diff_b keys by storing them separately
-        if k_stripped.endswith(".diff"):
-            target_model_key = k_stripped[:-len(".diff")] + ".weight"
-            MANUAL_PATCHES_STORE["diff"][target_model_key] = v
-            continue
-        elif k_stripped.endswith(".diff_b"):
-            target_model_key = k_stripped[:-len(".diff_b")] + ".bias"
-            MANUAL_PATCHES_STORE["diff_b"][target_model_key] = v
-            continue
-        # Handle standard LoRA A/B matrices
-        if ".lora_down.weight" in k_stripped:
-            diffusers_key_base = k_stripped.replace(".lora_down.weight", "")
-            # Apply transformations similar to _convert_non_diffusers_wan_lora_to_diffusers from diffusers
-            # but adapt to the PEFT naming convention (lora_A/lora_B)
-            # This part needs careful mapping based on WanTransformer3DModel structure
-            # Example mappings (these need to be comprehensive for all layers)
-            if diffusers_key_base.startswith("blocks."):
-                parts = diffusers_key_base.split(".")
-                block_idx = parts[1]
-                attn_type = parts[2] # self_attn or cross_attn
-                proj_type = parts[3] # q, k, v, o
-                if attn_type == "self_attn":
-                    diffusers_peft_key = f"transformer.blocks.{block_idx}.attn1.to_{proj_type}.lora_A.weight"
-                elif attn_type == "cross_attn":
-                    # WanTransformer3DModel uses attn2 for cross-attention like features
-                    diffusers_peft_key = f"transformer.blocks.{block_idx}.attn2.to_{proj_type}.lora_A.weight"
-                else: # ffn
-                    ffn_idx = proj_type # "0" or "2"
-                    diffusers_peft_key = f"transformer.blocks.{block_idx}.ffn.net.{ffn_idx}.proj.lora_A.weight"
-            elif diffusers_key_base.startswith("text_embedding."):
-                idx_map = {"0": "linear_1", "2": "linear_2"}
-                idx = diffusers_key_base.split(".")[1]
-                diffusers_peft_key = f"transformer.condition_embedder.text_embedder.{idx_map[idx]}.lora_A.weight"
-            elif diffusers_key_base.startswith("time_embedding."):
-                idx_map = {"0": "linear_1", "2": "linear_2"}
-                idx = diffusers_key_base.split(".")[1]
-                diffusers_peft_key = f"transformer.condition_embedder.time_embedder.{idx_map[idx]}.lora_A.weight"
-            elif diffusers_key_base.startswith("time_projection."): # Assuming '1' from your example
-                diffusers_peft_key = f"transformer.condition_embedder.time_proj.lora_A.weight"
-            elif diffusers_key_base.startswith("patch_embedding"):
-                # WanTransformer3DModel has 'patch_embedding' at the top level
-                diffusers_peft_key = f"transformer.patch_embedding.lora_A.weight" # This needs to match how PEFT would name it
-            elif diffusers_key_base.startswith("head.head"):
-                 diffusers_peft_key = f"transformer.proj_out.lora_A.weight"
-            else:
-                unhandled_keys.append(original_k)
-                continue
-            peft_compatible_state_dict[diffusers_peft_key] = v
-            original_keys_map_to_diffusers[k_stripped] = diffusers_peft_key
-        elif ".lora_up.weight" in k_stripped:
-            # Find the corresponding lora_down key to determine the base name
-            down_key_stripped = k_stripped.replace(".lora_up.weight", ".lora_down.weight")
-            if down_key_stripped in original_keys_map_to_diffusers:
-                diffusers_peft_key_A = original_keys_map_to_diffusers[down_key_stripped]
-                diffusers_peft_key_B = diffusers_peft_key_A.replace(".lora_A.weight", ".lora_B.weight")
-                peft_compatible_state_dict[diffusers_peft_key_B] = v
-            else:
-                unhandled_keys.append(original_k)
-        elif not (k_stripped.endswith(".alpha") or k_stripped.endswith(".dora_scale")): # Alphas are handled by PEFT if lora_A/B present
-            unhandled_keys.append(original_k)
-    if unhandled_keys:
-        logger.warning(f"Custom Wan LoRA Converter: Unhandled keys: {unhandled_keys}")
-    return peft_compatible_state_dict
-def apply_manual_diff_patches(pipe_model, patches_store, lora_strength=1.0):
-    if not hasattr(pipe_model, "transformer"):
-        logger.error("Pipeline model does not have a 'transformer' attribute to patch.")
-        return
-    transformer = pipe_model.transformer
-    changed_params_count = 0
-    for key_base, diff_tensor in patches_store.get("diff", {}).items():
-        # key_base is like "blocks.0.self_attn.q.weight"
-        # We need to prepend "transformer." to match diffusers internal naming
-        target_key_full = f"transformer.{key_base}"
-        try:
-            module_path_parts = target_key_full.split('.')
-            param_name = module_path_parts[-1]
-            module_path = ".".join(module_path_parts[:-1])
-            module = transformer
-            for part in module_path.split('.')[1:]: # Skip the first 'transformer'
-                module = getattr(module, part)
-            original_param = getattr(module, param_name)
-            if original_param.shape != diff_tensor.shape:
-                logger.warning(f"Shape mismatch for diff patch on {target_key_full}: model {original_param.shape}, lora {diff_tensor.shape}. Skipping.")
-                continue
-            with torch.no_grad():
-                scaled_diff = (lora_strength * diff_tensor.to(original_param.device, original_param.dtype))
-                original_param.data.add_(scaled_diff)
-                changed_params_count +=1
-        except AttributeError:
-            logger.warning(f"Could not find parameter {target_key_full} in transformer to apply diff patch.")
-        except Exception as e:
-            logger.error(f"Error applying diff patch to {target_key_full}: {e}")
-    for key_base, diff_b_tensor in patches_store.get("diff_b", {}).items():
-        # key_base is like "blocks.0.self_attn.q.bias"
-        target_key_full = f"transformer.{key_base}"
-        try:
-            module_path_parts = target_key_full.split('.')
-            param_name = module_path_parts[-1]
-            module_path = ".".join(module_path_parts[:-1])
-            module = transformer
-            for part in module_path.split('.')[1:]:
-                module = getattr(module, part)
-            original_param = getattr(module, param_name)
-            if original_param is None:
-                logger.warning(f"Bias parameter {target_key_full} is None in model. Skipping diff_b patch.")
-                continue
-            if original_param.shape != diff_b_tensor.shape:
-                logger.warning(f"Shape mismatch for diff_b patch on {target_key_full}: model {original_param.shape}, lora {diff_b_tensor.shape}. Skipping.")
-                continue
-            with torch.no_grad():
-                scaled_diff_b = (lora_strength * diff_b_tensor.to(original_param.device, original_param.dtype))
-                original_param.data.add_(scaled_diff_b)
-                changed_params_count +=1
-        except AttributeError:
-            logger.warning(f"Could not find parameter {target_key_full} in transformer to apply diff_b patch.")
-        except Exception as e:
-            logger.error(f"Error applying diff_b patch to {target_key_full}: {e}")
-    if changed_params_count > 0:
-        logger.info(f"Applied {changed_params_count} manual diff/diff_b patches.")
-    else:
-        logger.info("No manual diff/diff_b patches were applied.")
 # --- Model Loading ---
 logger.info(f"Loading VAE for {MODEL_ID}...")
 vae = AutoencoderKLWan.from_pretrained(
@@ -214,26 +44,7 @@ logger.info(f"Downloading LoRA {LORA_FILENAME} from {LORA_REPO_ID}...")
 causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
 logger.info("Loading LoRA weights with custom converter...")
-from safetensors.torch import load_file as load_safetensors
-raw_lora_state_dict = load_safetensors(causvid_path)
-# Now call our custom converter which will populate MANUAL_PATCHES_STORE
-peft_state_dict = _custom_convert_non_diffusers_wan_lora_to_diffusers(raw_lora_state_dict)
-# Load the LoRA A/B matrices using PEFT
-if peft_state_dict:
-    pipe.load_lora_weights(
-        peft_state_dict,
-        adapter_name="causvid_lora"
-    )
-    logger.info("PEFT LoRA A/B weights loaded.")
-else:
-    logger.warning("No PEFT-compatible LoRA weights found after conversion.")
-# Apply manual diff_b and diff patches
-apply_manual_diff_patches(pipe, MANUAL_PATCHES_STORE, lora_strength=1.0) # Assuming default strength 1.0
-logger.info("Manual diff_b/diff patches applied.")
 # --- Gradio Interface Function ---

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # --- Model Loading ---
 logger.info(f"Loading VAE for {MODEL_ID}...")
 vae = AutoencoderKLWan.from_pretrained(
 causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
 logger.info("Loading LoRA weights with custom converter...")
+pipe.load_lora_weights(causvid_path,adapter_name="causvid_lora")
 # --- Gradio Interface Function ---