from diffusers_helper.hf_login import login import json import os import time import argparse import traceback import einops import numpy as np import torch import spaces os.environ['HF_HOME'] = os.path.abspath(os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download'))) import gradio as gr from PIL import Image from PIL.PngImagePlugin import PngInfo from diffusers import AutoencoderKLHunyuanVideo from transformers import LlamaModel, CLIPTextModel, LlamaTokenizerFast, CLIPTokenizer from diffusers_helper.hunyuan import encode_prompt_conds, vae_decode, vae_encode, vae_decode_fake from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, generate_timestamp from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete from diffusers_helper.thread_utils import AsyncStream from diffusers_helper.gradio.progress_bar import make_progress_bar_html from transformers import SiglipImageProcessor, SiglipVisionModel from diffusers_helper.clip_vision import hf_clip_vision_encode from diffusers_helper.bucket_tools import find_nearest_bucket from diffusers_helper import lora_utils from diffusers_helper.lora_utils import load_lora, unload_all_loras # Import from modules from modules.video_queue import VideoJobQueue, JobStatus from modules.prompt_handler import parse_timestamped_prompt from modules.interface import create_interface, format_queue_status from modules.settings import Settings # ADDED: Debug function to verify LoRA state def verify_lora_state(transformer, label=""): """Debug function to verify the state of LoRAs in a transformer model""" if transformer is None: print(f"[{label}] Transformer is None, cannot verify LoRA state") return has_loras = False if hasattr(transformer, 'peft_config'): adapter_names = list(transformer.peft_config.keys()) if transformer.peft_config else [] if adapter_names: has_loras = True print(f"[{label}] Transformer has LoRAs: {', '.join(adapter_names)}") else: print(f"[{label}] Transformer has no LoRAs in peft_config") else: print(f"[{label}] Transformer has no peft_config attribute") # Check for any LoRA modules for name, module in transformer.named_modules(): if hasattr(module, 'lora_A') and module.lora_A: has_loras = True # print(f"[{label}] Found lora_A in module {name}") if hasattr(module, 'lora_B') and module.lora_B: has_loras = True # print(f"[{label}] Found lora_B in module {name}") if not has_loras: print(f"[{label}] No LoRA components found in transformer") parser = argparse.ArgumentParser() parser.add_argument('--share', action='store_true') parser.add_argument("--server", type=str, default='0.0.0.0') parser.add_argument("--port", type=int, required=False) parser.add_argument("--inbrowser", action='store_true') parser.add_argument("--lora", type=str, default=None, help="Lora path (comma separated for multiple)") args = parser.parse_args() print(args) free_mem_gb = get_cuda_free_memory_gb(gpu) high_vram = free_mem_gb > 60 print(f'Free VRAM {free_mem_gb} GB') print(f'High-VRAM Mode: {high_vram}') # Load models text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=torch.float16).cpu() text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=torch.float16).cpu() tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer') tokenizer_2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer_2') vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='vae', torch_dtype=torch.float16).cpu() feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor') image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=torch.float16).cpu() # Initialize transformer placeholders transformer_original = None transformer_f1 = None current_transformer = None # Will hold the currently active model # Load models based on VRAM availability later # Configure models vae.eval() text_encoder.eval() text_encoder_2.eval() image_encoder.eval() if not high_vram: vae.enable_slicing() vae.enable_tiling() vae.to(dtype=torch.float16) image_encoder.to(dtype=torch.float16) text_encoder.to(dtype=torch.float16) text_encoder_2.to(dtype=torch.float16) vae.requires_grad_(False) text_encoder.requires_grad_(False) text_encoder_2.requires_grad_(False) image_encoder.requires_grad_(False) # Create lora directory if it doesn't exist lora_dir = os.path.join(os.path.dirname(__file__), 'loras') os.makedirs(lora_dir, exist_ok=True) # Initialize LoRA support - moved scanning after settings load lora_names = [] lora_values = [] # This seems unused for population, might be related to weights later script_dir = os.path.dirname(os.path.abspath(__file__)) # Define default LoRA folder path relative to the script directory (used if setting is missing) default_lora_folder = os.path.join(script_dir, "loras") os.makedirs(default_lora_folder, exist_ok=True) # Ensure default exists if not high_vram: # DynamicSwapInstaller is same as huggingface's enable_sequential_offload but 3x faster DynamicSwapInstaller.install_model(text_encoder, device=gpu) else: text_encoder.to(gpu) text_encoder_2.to(gpu) image_encoder.to(gpu) vae.to(gpu) stream = AsyncStream() outputs_folder = './outputs/' os.makedirs(outputs_folder, exist_ok=True) # Initialize settings settings = Settings() # --- Populate LoRA names AFTER settings are loaded --- lora_folder_from_settings = settings.get("lora_dir", default_lora_folder) # Use setting, fallback to default print(f"Scanning for LoRAs in: {lora_folder_from_settings}") if os.path.isdir(lora_folder_from_settings): try: lora_files = [f for f in os.listdir(lora_folder_from_settings) if f.endswith('.safetensors') or f.endswith('.pt')] for lora_file in lora_files: lora_names.append(lora_file.split('.')[0]) # Get name without extension print(f"Found LoRAs: {lora_names}") except Exception as e: print(f"Error scanning LoRA directory '{lora_folder_from_settings}': {e}") else: print(f"LoRA directory not found: {lora_folder_from_settings}") # --- End LoRA population --- # Create job queue job_queue = VideoJobQueue() def move_lora_adapters_to_device(model, target_device): """ Move all LoRA adapters in a model to the specified device. This handles the PEFT implementation of LoRA. """ print(f"Moving all LoRA adapters to {target_device}") # First, find all modules with LoRA adapters lora_modules = [] for name, module in model.named_modules(): if hasattr(module, 'active_adapter') and hasattr(module, 'lora_A') and hasattr(module, 'lora_B'): lora_modules.append((name, module)) # Now move all LoRA components to the target device for name, module in lora_modules: # Get the active adapter name active_adapter = module.active_adapter # Move the LoRA layers to the target device if active_adapter is not None: if isinstance(module.lora_A, torch.nn.ModuleDict): # Handle ModuleDict case (PEFT implementation) for adapter_name in list(module.lora_A.keys()): # Move lora_A if adapter_name in module.lora_A: module.lora_A[adapter_name] = module.lora_A[adapter_name].to(target_device) # Move lora_B if adapter_name in module.lora_B: module.lora_B[adapter_name] = module.lora_B[adapter_name].to(target_device) # Move scaling if hasattr(module, 'scaling') and isinstance(module.scaling, dict) and adapter_name in module.scaling: if isinstance(module.scaling[adapter_name], torch.Tensor): module.scaling[adapter_name] = module.scaling[adapter_name].to(target_device) else: # Handle direct attribute case if hasattr(module, 'lora_A') and module.lora_A is not None: module.lora_A = module.lora_A.to(target_device) if hasattr(module, 'lora_B') and module.lora_B is not None: module.lora_B = module.lora_B.to(target_device) if hasattr(module, 'scaling') and module.scaling is not None: if isinstance(module.scaling, torch.Tensor): module.scaling = module.scaling.to(target_device) print(f"Moved all LoRA adapters to {target_device}") return model # Function to load a LoRA file def load_lora_file(lora_file): if not lora_file: return None, "No file selected" try: # Get the filename from the path _, lora_name = os.path.split(lora_file) # Copy the file to the lora directory lora_dest = os.path.join(lora_dir, lora_name) import shutil shutil.copy(lora_file, lora_dest) # Load the LoRA - NOTE: This needs adjustment for multiple transformers global current_transformer, lora_names if current_transformer is None: return None, "Error: No model loaded to apply LoRA to. Generate something first." # ADDED: Unload any existing LoRAs first current_transformer = lora_utils.unload_all_loras(current_transformer) current_transformer = lora_utils.load_lora(current_transformer, lora_dir, lora_name) # Add to lora_names if not already there lora_base_name = lora_name.split('.')[0] if lora_base_name not in lora_names: lora_names.append(lora_base_name) # Get the current device of the transformer device = next(current_transformer.parameters()).device # Move all LoRA adapters to the same device as the base model move_lora_adapters_to_device(current_transformer, device) print(f"Loaded LoRA: {lora_name} to {type(current_transformer).__name__}") # ADDED: Verify LoRA state after loading verify_lora_state(current_transformer, "After loading LoRA file") return gr.update(choices=lora_names), f"Successfully loaded LoRA: {lora_name}" except Exception as e: print(f"Error loading LoRA: {e}") return None, f"Error loading LoRA: {e}" @torch.no_grad() def worker( model_type, input_image, prompt_text, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf, save_metadata, blend_sections, latent_type, selected_loras, clean_up_videos, lora_values=None, job_stream=None, output_dir=None, metadata_dir=None, resolutionW=640, # Add resolution parameter with default value resolutionH=640, lora_loaded_names=[] ): global transformer_original, transformer_f1, current_transformer, high_vram # ADDED: Ensure any existing LoRAs are unloaded from the current transformer if current_transformer is not None: print("Unloading any existing LoRAs before starting new job") current_transformer = lora_utils.unload_all_loras(current_transformer) import gc gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() # ADDED: Verify LoRA state at worker start verify_lora_state(current_transformer, "Worker start") stream_to_use = job_stream if job_stream is not None else stream total_latent_sections = (total_second_length * 30) / (latent_window_size * 4) total_latent_sections = int(max(round(total_latent_sections), 1)) # Parse the timestamped prompt with boundary snapping and reversing # prompt_text should now be the original string from the job queue prompt_sections = parse_timestamped_prompt(prompt_text, total_second_length, latent_window_size, model_type) job_id = generate_timestamp() stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...')))) try: if not high_vram: # Unload everything *except* the potentially active transformer unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae) if current_transformer is not None: offload_model_from_device_for_memory_preservation(current_transformer, target_device=gpu, preserved_memory_gb=8) # --- Model Loading / Switching --- print(f"Worker starting for model type: {model_type}") target_transformer_model = None other_transformer_model = None if model_type == "Original": if transformer_original is None: print("Loading Original Transformer...") transformer_original = HunyuanVideoTransformer3DModelPacked.from_pretrained('lllyasviel/FramePackI2V_HY', torch_dtype=torch.bfloat16).cpu() transformer_original.eval() transformer_original.to(dtype=torch.bfloat16) transformer_original.requires_grad_(False) if not high_vram: DynamicSwapInstaller.install_model(transformer_original, device=gpu) print("Original Transformer Loaded.") target_transformer_model = transformer_original other_transformer_model = transformer_f1 elif model_type == "F1": if transformer_f1 is None: print("Loading F1 Transformer...") transformer_f1 = HunyuanVideoTransformer3DModelPacked.from_pretrained('lllyasviel/FramePack_F1_I2V_HY_20250503', torch_dtype=torch.bfloat16).cpu() transformer_f1.eval() transformer_f1.to(dtype=torch.bfloat16) transformer_f1.requires_grad_(False) if not high_vram: DynamicSwapInstaller.install_model(transformer_f1, device=gpu) print("F1 Transformer Loaded.") target_transformer_model = transformer_f1 other_transformer_model = transformer_original else: raise ValueError(f"Unknown model_type: {model_type}") # Unload the *other* model if it exists and we are in low VRAM mode if not high_vram and other_transformer_model is not None: print(f"Offloading inactive transformer: {type(other_transformer_model).__name__}") offload_model_from_device_for_memory_preservation(other_transformer_model, target_device=gpu, preserved_memory_gb=8) # Consider fully unloading if memory pressure is extreme: # unload_complete_models(other_transformer_model) # if model_type == "Original": transformer_f1 = None # else: transformer_original = None current_transformer = target_transformer_model # Set the globally accessible current model # ADDED: Ensure the target model has no LoRAs loaded print(f"Ensuring {model_type} transformer has no LoRAs loaded") current_transformer = lora_utils.unload_all_loras(current_transformer) verify_lora_state(current_transformer, "After model selection") # Ensure the target model is on the correct device if in high VRAM mode if high_vram and current_transformer.device != gpu: print(f"Moving {model_type} transformer to GPU (High VRAM mode)...") current_transformer.to(gpu) # Pre-encode all prompts stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding all prompts...')))) if not high_vram: fake_diffusers_current_device(text_encoder, gpu) load_model_as_complete(text_encoder_2, target_device=gpu) # PROMPT BLENDING: Pre-encode all prompts and store in a list in order unique_prompts = [] for section in prompt_sections: if section.prompt not in unique_prompts: unique_prompts.append(section.prompt) encoded_prompts = {} for prompt in unique_prompts: llama_vec, clip_l_pooler = encode_prompt_conds( prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2 ) llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512) encoded_prompts[prompt] = (llama_vec, llama_attention_mask, clip_l_pooler) # PROMPT BLENDING: Build a list of (start_section_idx, prompt) for each prompt prompt_change_indices = [] last_prompt = None for idx, section in enumerate(prompt_sections): if section.prompt != last_prompt: prompt_change_indices.append((idx, section.prompt)) last_prompt = section.prompt # Encode negative prompt if cfg == 1: llama_vec_n, llama_attention_mask_n, clip_l_pooler_n = ( torch.zeros_like(encoded_prompts[prompt_sections[0].prompt][0]), torch.zeros_like(encoded_prompts[prompt_sections[0].prompt][1]), torch.zeros_like(encoded_prompts[prompt_sections[0].prompt][2]) ) else: llama_vec_n, clip_l_pooler_n = encode_prompt_conds( n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2 ) llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512) # Processing input image stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Image processing ...')))) H, W, C = input_image.shape height, width = find_nearest_bucket(H, W, resolution=resolutionW) input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height) if save_metadata: metadata = PngInfo() # prompt_text should be a string here now metadata.add_text("prompt", prompt_text) metadata.add_text("seed", str(seed)) Image.fromarray(input_image_np).save(os.path.join(metadata_dir, f'{job_id}.png'), pnginfo=metadata) metadata_dict = { "prompt": prompt_text, # Use the original string "seed": seed, "total_second_length": total_second_length, "steps": steps, "cfg": cfg, "gs": gs, "rs": rs, "latent_type" : latent_type, "blend_sections": blend_sections, "latent_window_size": latent_window_size, "mp4_crf": mp4_crf, "timestamp": time.time(), "resolutionW": resolutionW, # Add resolution to metadata "resolutionH": resolutionH, "model_type": model_type # Add model type to metadata } # Add LoRA information to metadata if LoRAs are used def ensure_list(x): if isinstance(x, list): return x elif x is None: return [] else: return [x] selected_loras = ensure_list(selected_loras) lora_values = ensure_list(lora_values) if selected_loras and len(selected_loras) > 0: lora_data = {} for lora_name in selected_loras: try: idx = lora_loaded_names.index(lora_name) weight = lora_values[idx] if lora_values and idx < len(lora_values) else 1.0 if isinstance(weight, list): weight_value = weight[0] if weight and len(weight) > 0 else 1.0 else: weight_value = weight lora_data[lora_name] = float(weight_value) except ValueError: lora_data[lora_name] = 1.0 metadata_dict["loras"] = lora_data with open(os.path.join(metadata_dir, f'{job_id}.json'), 'w') as f: json.dump(metadata_dict, f, indent=2) else: Image.fromarray(input_image_np).save(os.path.join(metadata_dir, f'{job_id}.png')) input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1 input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None] # VAE encoding stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...')))) if not high_vram: load_model_as_complete(vae, target_device=gpu) start_latent = vae_encode(input_image_pt, vae) # CLIP Vision stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...')))) if not high_vram: load_model_as_complete(image_encoder, target_device=gpu) image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder) image_encoder_last_hidden_state = image_encoder_output.last_hidden_state # Dtype for prompt_key in encoded_prompts: llama_vec, llama_attention_mask, clip_l_pooler = encoded_prompts[prompt_key] llama_vec = llama_vec.to(current_transformer.dtype) clip_l_pooler = clip_l_pooler.to(current_transformer.dtype) encoded_prompts[prompt_key] = (llama_vec, llama_attention_mask, clip_l_pooler) llama_vec_n = llama_vec_n.to(current_transformer.dtype) clip_l_pooler_n = clip_l_pooler_n.to(current_transformer.dtype) image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(current_transformer.dtype) # Sampling stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Start sampling ...')))) rnd = torch.Generator("cpu").manual_seed(seed) num_frames = latent_window_size * 4 - 3 if model_type == "Original": history_latents = torch.zeros(size=(1, 16, 1 + 2 + 16, height // 8, width // 8), dtype=torch.float32).cpu() else: # F1 model # F1モードでは初期フレームを用意 history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu() # 開始フレームをhistory_latentsに追加 history_latents = torch.cat([history_latents, start_latent.to(history_latents)], dim=2) total_generated_latent_frames = 1 # 最初のフレームを含むので1から開始 history_pixels = None if model_type == "Original": total_generated_latent_frames = 0 # Original model uses reversed latent paddings latent_paddings = reversed(range(total_latent_sections)) if total_latent_sections > 4: latent_paddings = [3] + [2] * (total_latent_sections - 3) + [1, 0] else: # F1 model # F1 model doesn't use latent paddings in the same way # We'll use a fixed approach with just 0 for last section and 1 for others latent_paddings = [1] * (total_latent_sections - 1) + [0] # PROMPT BLENDING: Track section index section_idx = 0 # ADDED: Completely unload all loras from the current transformer current_transformer = lora_utils.unload_all_loras(current_transformer) verify_lora_state(current_transformer, "Before loading LoRAs") # --- LoRA loading and scaling --- if selected_loras: for lora_name in selected_loras: idx = lora_loaded_names.index(lora_name) lora_file = None for ext in [".safetensors", ".pt"]: # Find any file that starts with the lora_name and ends with the extension matching_files = [f for f in os.listdir(lora_folder_from_settings) if f.startswith(lora_name) and f.endswith(ext)] if matching_files: lora_file = matching_files[0] # Use the first matching file break if lora_file: print(f"Loading LoRA {lora_file} to {model_type} model") current_transformer = lora_utils.load_lora(current_transformer, lora_folder_from_settings, lora_file) # Set LoRA strength if provided if lora_values and idx < len(lora_values): lora_strength = float(lora_values[idx]) print(f"Setting LoRA {lora_name} strength to {lora_strength}") # Set scaling for this LoRA by iterating through modules for name, module in current_transformer.named_modules(): if hasattr(module, 'scaling'): if isinstance(module.scaling, dict): # Handle ModuleDict case (PEFT implementation) if lora_name in module.scaling: if isinstance(module.scaling[lora_name], torch.Tensor): module.scaling[lora_name] = torch.tensor( lora_strength, device=module.scaling[lora_name].device ) else: module.scaling[lora_name] = lora_strength else: # Handle direct attribute case for scaling if needed if isinstance(module.scaling, torch.Tensor): module.scaling = torch.tensor( lora_strength, device=module.scaling.device ) else: module.scaling = lora_strength else: print(f"LoRA file for {lora_name} not found!") # ADDED: Verify LoRA state after loading verify_lora_state(current_transformer, "After loading LoRAs") # --- Callback for progress --- def callback(d): preview = d['denoised'] preview = vae_decode_fake(preview) preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8) preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c') if stream_to_use.input_queue.top() == 'end': stream_to_use.output_queue.push(('end', None)) raise KeyboardInterrupt('User ends the task.') current_step = d['i'] + 1 percentage = int(100.0 * current_step / steps) current_pos = (total_generated_latent_frames * 4 - 3) / 30 original_pos = total_second_length - current_pos if current_pos < 0: current_pos = 0 if original_pos < 0: original_pos = 0 hint = f'Sampling {current_step}/{steps}' if model_type == "Original": desc = f'Total generated frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, ' \ f'Video length: {max(0, (total_generated_latent_frames * 4 - 3) / 30):.2f} seconds (FPS-30). ' \ f'Current position: {current_pos:.2f}s (original: {original_pos:.2f}s). ' \ f'using prompt: {current_prompt[:256]}...' else: # F1 model desc = f'Total generated frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, ' \ f'Video length: {max(0, (total_generated_latent_frames * 4 - 3) / 30):.2f} seconds (FPS-30). ' \ f'Current position: {current_pos:.2f}s. ' \ f'using prompt: {current_prompt[:256]}...' progress_data = { 'preview': preview, 'desc': desc, 'html': make_progress_bar_html(percentage, hint) } if job_stream is not None: job = job_queue.get_job(job_id) if job: job.progress_data = progress_data stream_to_use.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint)))) # --- Main generation loop --- for latent_padding in latent_paddings: is_last_section = latent_padding == 0 latent_padding_size = latent_padding * latent_window_size if stream_to_use.input_queue.top() == 'end': stream_to_use.output_queue.push(('end', None)) return current_time_position = (total_generated_latent_frames * 4 - 3) / 30 # in seconds if current_time_position < 0: current_time_position = 0.01 # Find the appropriate prompt for this section current_prompt = prompt_sections[0].prompt # Default to first prompt for section in prompt_sections: if section.start_time <= current_time_position and (section.end_time is None or current_time_position < section.end_time): current_prompt = section.prompt break # PROMPT BLENDING: Find if we're in a blend window blend_alpha = None prev_prompt = current_prompt next_prompt = current_prompt # Only try to blend if we have prompt change indices and multiple sections if prompt_change_indices and len(prompt_sections) > 1: for i, (change_idx, prompt) in enumerate(prompt_change_indices): if section_idx < change_idx: prev_prompt = prompt_change_indices[i - 1][1] if i > 0 else prompt next_prompt = prompt blend_start = change_idx blend_end = change_idx + blend_sections if section_idx >= change_idx and section_idx < blend_end: blend_alpha = (section_idx - change_idx + 1) / blend_sections break elif section_idx == change_idx: # At the exact change, start blending if i > 0: prev_prompt = prompt_change_indices[i - 1][1] next_prompt = prompt blend_alpha = 1.0 / blend_sections else: prev_prompt = prompt next_prompt = prompt blend_alpha = None break else: # After last change, no blending prev_prompt = current_prompt next_prompt = current_prompt blend_alpha = None # Get the encoded prompt for this section if blend_alpha is not None and prev_prompt != next_prompt: # Blend embeddings prev_llama_vec, prev_llama_attention_mask, prev_clip_l_pooler = encoded_prompts[prev_prompt] next_llama_vec, next_llama_attention_mask, next_clip_l_pooler = encoded_prompts[next_prompt] llama_vec = (1 - blend_alpha) * prev_llama_vec + blend_alpha * next_llama_vec llama_attention_mask = prev_llama_attention_mask # usually same clip_l_pooler = (1 - blend_alpha) * prev_clip_l_pooler + blend_alpha * next_clip_l_pooler print(f"Blending prompts: '{prev_prompt[:30]}...' -> '{next_prompt[:30]}...', alpha={blend_alpha:.2f}") else: llama_vec, llama_attention_mask, clip_l_pooler = encoded_prompts[current_prompt] original_time_position = total_second_length - current_time_position if original_time_position < 0: original_time_position = 0 print(f'latent_padding_size = {latent_padding_size}, is_last_section = {is_last_section}, ' f'time position: {current_time_position:.2f}s (original: {original_time_position:.2f}s), ' f'using prompt: {current_prompt[:60]}...') if model_type == "Original": # Original model uses the standard indices approach indices = torch.arange(0, sum([1, latent_padding_size, latent_window_size, 1, 2, 16])).unsqueeze(0) clean_latent_indices_pre, blank_indices, latent_indices, clean_latent_indices_post, clean_latent_2x_indices, clean_latent_4x_indices = indices.split([1, latent_padding_size, latent_window_size, 1, 2, 16], dim=1) clean_latent_indices = torch.cat([clean_latent_indices_pre, clean_latent_indices_post], dim=1) else: # F1 model # F1 model uses a different indices approach # latent_window_sizeが4.5の場合は特別に5を使用 effective_window_size = 5 if latent_window_size == 4.5 else int(latent_window_size) indices = torch.arange(0, sum([1, 16, 2, 1, latent_window_size])).unsqueeze(0) clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices = indices.split([1, 16, 2, 1, latent_window_size], dim=1) clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices], dim=1) print(f"F1 model indices: clean_latent_indices shape={clean_latent_indices.shape}, latent_indices shape={latent_indices.shape}") if model_type == "Original": clean_latents_pre = start_latent.to(history_latents) clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16, :, :].split([1, 2, 16], dim=2) clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2) else: # F1 model # For F1, we take the last frames for clean latents clean_latents_4x, clean_latents_2x, clean_latents_1x = history_latents[:, :, -sum([16, 2, 1]):, :, :].split([16, 2, 1], dim=2) # For F1, we prepend the start latent to clean_latents_1x clean_latents = torch.cat([start_latent.to(history_latents), clean_latents_1x], dim=2) # Print debug info for F1 model print(f"F1 model section {section_idx+1}/{total_latent_sections}, latent_padding={latent_padding}") if not high_vram: # Unload VAE etc. before loading transformer unload_complete_models(vae, text_encoder, text_encoder_2, image_encoder) move_model_to_device_with_memory_preservation(current_transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation) if selected_loras: move_lora_adapters_to_device(current_transformer, gpu) if use_teacache: current_transformer.initialize_teacache(enable_teacache=True, num_steps=steps) else: current_transformer.initialize_teacache(enable_teacache=False) generated_latents = sample_hunyuan( transformer=current_transformer, sampler='unipc', width=width, height=height, frames=num_frames, real_guidance_scale=cfg, distilled_guidance_scale=gs, guidance_rescale=rs, num_inference_steps=steps, generator=rnd, prompt_embeds=llama_vec, prompt_embeds_mask=llama_attention_mask, prompt_poolers=clip_l_pooler, negative_prompt_embeds=llama_vec_n, negative_prompt_embeds_mask=llama_attention_mask_n, negative_prompt_poolers=clip_l_pooler_n, device=gpu, dtype=torch.bfloat16, image_embeddings=image_encoder_last_hidden_state, latent_indices=latent_indices, clean_latents=clean_latents, clean_latent_indices=clean_latent_indices, clean_latents_2x=clean_latents_2x, clean_latent_2x_indices=clean_latent_2x_indices, clean_latents_4x=clean_latents_4x, clean_latent_4x_indices=clean_latent_4x_indices, callback=callback, ) total_generated_latent_frames += int(generated_latents.shape[2]) if model_type == "Original": history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2) else: # F1 model # For F1, we append new frames to the end history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2) if not high_vram: if selected_loras: move_lora_adapters_to_device(current_transformer, cpu) offload_model_from_device_for_memory_preservation(current_transformer, target_device=gpu, preserved_memory_gb=8) load_model_as_complete(vae, target_device=gpu) if model_type == "Original": real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :] else: # F1 model # For F1, we take frames from the end real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :] if history_pixels is None: history_pixels = vae_decode(real_history_latents, vae).cpu() else: section_latent_frames = (latent_window_size * 2 + 1) if is_last_section else (latent_window_size * 2) overlapped_frames = latent_window_size * 4 - 3 if model_type == "Original": current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu() history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames) else: # F1 model # For F1, we take frames from the end print(f"F1 model section {section_idx+1}/{total_latent_sections}, section_latent_frames={section_latent_frames}") print(f"F1 model real_history_latents shape: {real_history_latents.shape}, taking last {section_latent_frames} frames") # Get the frames from the end of real_history_latents current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu() print(f"F1 model current_pixels shape: {current_pixels.shape}, history_pixels shape: {history_pixels.shape if history_pixels is not None else 'None'}") # For F1 model, history_pixels is first, current_pixels is second history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames) print(f"F1 model after append, history_pixels shape: {history_pixels.shape}") if not high_vram: unload_complete_models() output_filename = os.path.join(output_dir, f'{job_id}_{total_generated_latent_frames}.mp4') save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=mp4_crf) print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}') stream_to_use.output_queue.push(('file', output_filename)) if is_last_section: break section_idx += 1 # PROMPT BLENDING: increment section index # ADDED: Unload all LoRAs after generation completed if selected_loras: print("Unloading all LoRAs after generation completed") current_transformer = lora_utils.unload_all_loras(current_transformer) verify_lora_state(current_transformer, "After generation completed") import gc gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() except: traceback.print_exc() # ADDED: Unload all LoRAs after error if current_transformer is not None and selected_loras: print("Unloading all LoRAs after error") current_transformer = lora_utils.unload_all_loras(current_transformer) verify_lora_state(current_transformer, "After error") import gc gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() stream_to_use.output_queue.push(('error', f"Error during generation: {traceback.format_exc()}")) if not high_vram: # Ensure all models including the potentially active transformer are unloaded on error unload_complete_models( text_encoder, text_encoder_2, image_encoder, vae, current_transformer ) if clean_up_videos: try: video_files = [ f for f in os.listdir(output_dir) if f.startswith(f"{job_id}_") and f.endswith(".mp4") ] print(f"Video files found for cleanup: {video_files}") if video_files: def get_frame_count(filename): try: # Handles filenames like jobid_123.mp4 return int(filename.replace(f"{job_id}_", "").replace(".mp4", "")) except Exception: return -1 video_files_sorted = sorted(video_files, key=get_frame_count) print(f"Sorted video files: {video_files_sorted}") final_video = video_files_sorted[-1] for vf in video_files_sorted[:-1]: full_path = os.path.join(output_dir, vf) try: os.remove(full_path) print(f"Deleted intermediate video: {full_path}") except Exception as e: print(f"Failed to delete {full_path}: {e}") except Exception as e: print(f"Error during video cleanup: {e}") # ADDED: Final verification of LoRA state verify_lora_state(current_transformer, "Worker end") stream_to_use.output_queue.push(('end', None)) return # Set the worker function for the job queue job_queue.set_worker_function(worker) def get_duration( model_type, input_image, prompt_text, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf, save_metadata, blend_sections, latent_type, clean_up_videos, selected_loras, resolutionW, resolutionH, lora_loaded_names, *lora_values): return total_second_length * 60 @spaces.GPU(duration=get_duration) def process( model_type, input_image, prompt_text, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache, mp4_crf, save_metadata, blend_sections, latent_type, clean_up_videos, selected_loras, resolutionW, resolutionH, lora_loaded_names, *lora_values ): # Create a blank black image if no # Create a default image based on the selected latent_type if input_image is None: default_height, default_width = resolutionH, resolutionW if latent_type == "White": # Create a white image input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255 print("No input image provided. Using a blank white image.") elif latent_type == "Noise": # Create a noise image input_image = np.random.randint(0, 256, (default_height, default_width, 3), dtype=np.uint8) print("No input image provided. Using a random noise image.") elif latent_type == "Green Screen": # Create a green screen image with standard chroma key green (0, 177, 64) input_image = np.zeros((default_height, default_width, 3), dtype=np.uint8) input_image[:, :, 1] = 177 # Green channel input_image[:, :, 2] = 64 # Blue channel # Red channel remains 0 print("No input image provided. Using a standard chroma key green screen.") else: # Default to "Black" or any other value # Create a black image input_image = np.zeros((default_height, default_width, 3), dtype=np.uint8) print(f"No input image provided. Using a blank black image (latent_type: {latent_type}).") # Create job parameters job_params = { 'model_type': model_type, 'input_image': input_image.copy(), # Make a copy to avoid reference issues 'prompt_text': prompt_text, 'n_prompt': n_prompt, 'seed': seed, 'total_second_length': total_second_length, 'latent_window_size': latent_window_size, 'latent_type': latent_type, 'steps': steps, 'cfg': cfg, 'gs': gs, 'rs': rs, 'blend_sections': blend_sections, 'gpu_memory_preservation': gpu_memory_preservation, 'use_teacache': use_teacache, 'mp4_crf': mp4_crf, 'save_metadata': save_metadata, 'selected_loras': selected_loras, 'clean_up_videos': clean_up_videos, 'output_dir': settings.get("output_dir"), 'metadata_dir': settings.get("metadata_dir"), 'resolutionW': resolutionW, # Add resolution parameter 'resolutionH': resolutionH, 'lora_loaded_names': lora_loaded_names } # Add LoRA values if provided - extract them from the tuple if lora_values: # Convert tuple to list lora_values_list = list(lora_values) job_params['lora_values'] = lora_values_list # Add job to queue job_id = job_queue.add_job(job_params) print(f"Added job {job_id} to queue") queue_status = update_queue_status() # Return immediately after adding to queue return None, job_id, None, '', f'Job added to queue. Job ID: {job_id}', gr.update(interactive=True), gr.update(interactive=True) def end_process(): """Cancel the current running job and update the queue status""" print("Cancelling current job") with job_queue.lock: if job_queue.current_job: job_id = job_queue.current_job.id print(f"Cancelling job {job_id}") # Send the end signal to the job's stream if job_queue.current_job.stream: job_queue.current_job.stream.input_queue.push('end') # Mark the job as cancelled job_queue.current_job.status = JobStatus.CANCELLED job_queue.current_job.completed_at = time.time() # Set completion time # Force an update to the queue status return update_queue_status() def update_queue_status(): """Update queue status and refresh job positions""" jobs = job_queue.get_all_jobs() for job in jobs: if job.status == JobStatus.PENDING: job.queue_position = job_queue.get_queue_position(job.id) # Make sure to update current running job info if job_queue.current_job: # Make sure the running job is showing status = RUNNING job_queue.current_job.status = JobStatus.RUNNING return format_queue_status(jobs) def monitor_job(job_id): """ Monitor a specific job and update the UI with the latest video segment as soon as it's available. """ if not job_id: yield None, None, None, '', 'No job ID provided', gr.update(interactive=True), gr.update(interactive=True) return last_video = None # Track the last video file shown while True: job = job_queue.get_job(job_id) if not job: yield None, job_id, None, '', 'Job not found', gr.update(interactive=True), gr.update(interactive=True) return # If a new video file is available, yield it immediately if job.result and job.result != last_video: last_video = job.result # You can also update preview/progress here if desired yield last_video, job_id, gr.update(visible=True), '', '', gr.update(interactive=True), gr.update(interactive=True) # Handle job status and progress if job.status == JobStatus.PENDING: position = job_queue.get_queue_position(job_id) yield last_video, job_id, gr.update(visible=True), '', f'Waiting in queue. Position: {position}', gr.update(interactive=True), gr.update(interactive=True) elif job.status == JobStatus.RUNNING: if job.progress_data and 'preview' in job.progress_data: preview = job.progress_data.get('preview') desc = job.progress_data.get('desc', '') html = job.progress_data.get('html', '') yield last_video, job_id, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=True), gr.update(interactive=True) else: yield last_video, job_id, gr.update(visible=True), '', 'Processing...', gr.update(interactive=True), gr.update(interactive=True) elif job.status == JobStatus.COMPLETED: # Show the final video yield last_video, job_id, gr.update(visible=True), '', '', gr.update(interactive=True), gr.update(interactive=True) break elif job.status == JobStatus.FAILED: yield last_video, job_id, gr.update(visible=True), '', f'Error: {job.error}', gr.update(interactive=True), gr.update(interactive=True) break elif job.status == JobStatus.CANCELLED: yield last_video, job_id, gr.update(visible=True), '', 'Job cancelled', gr.update(interactive=True), gr.update(interactive=True) break # Wait a bit before checking again time.sleep(0.5) # Set Gradio temporary directory from settings os.environ["GRADIO_TEMP_DIR"] = settings.get("gradio_temp_dir") # Create the interface interface = create_interface( process_fn=process, monitor_fn=monitor_job, end_process_fn=end_process, update_queue_status_fn=update_queue_status, load_lora_file_fn=load_lora_file, job_queue=job_queue, settings=settings, lora_names=lora_names # Explicitly pass the found LoRA names ) # Launch the interface # interface.launch( # server_name=args.server, # server_port=args.port, # share=args.share, # inbrowser=args.inbrowser # ) interface.launch(share=True)