Spaces:

rahul7star
/

FramePack-Studio

Running on Zero

App Files Files Community

FramePack-Studio / studio.py

rahul7star

Update studio.py

244707f verified 2 days ago

raw

history blame contribute delete

52.4 kB

	from diffusers_helper.hf_login import login

	import json
	import os
	import time
	import argparse
	import traceback
	import einops
	import numpy as np
	import torch
	import spaces
	os.environ['HF_HOME'] = os.path.abspath(os.path.realpath(os.path.join(os.path.dirname(__file__), './hf_download')))

	import gradio as gr
	from PIL import Image
	from PIL.PngImagePlugin import PngInfo
	from diffusers import AutoencoderKLHunyuanVideo
	from transformers import LlamaModel, CLIPTextModel, LlamaTokenizerFast, CLIPTokenizer
	from diffusers_helper.hunyuan import encode_prompt_conds, vae_decode, vae_encode, vae_decode_fake
	from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, generate_timestamp
	from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
	from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
	from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete
	from diffusers_helper.thread_utils import AsyncStream
	from diffusers_helper.gradio.progress_bar import make_progress_bar_html
	from transformers import SiglipImageProcessor, SiglipVisionModel
	from diffusers_helper.clip_vision import hf_clip_vision_encode
	from diffusers_helper.bucket_tools import find_nearest_bucket
	from diffusers_helper import lora_utils
	from diffusers_helper.lora_utils import load_lora, unload_all_loras

	# Import from modules
	from modules.video_queue import VideoJobQueue, JobStatus
	from modules.prompt_handler import parse_timestamped_prompt
	from modules.interface import create_interface, format_queue_status
	from modules.settings import Settings

	# ADDED: Debug function to verify LoRA state
	def verify_lora_state(transformer, label=""):
	"""Debug function to verify the state of LoRAs in a transformer model"""
	if transformer is None:
	print(f"[{label}] Transformer is None, cannot verify LoRA state")
	return

	has_loras = False
	if hasattr(transformer, 'peft_config'):
	adapter_names = list(transformer.peft_config.keys()) if transformer.peft_config else []
	if adapter_names:
	has_loras = True
	print(f"[{label}] Transformer has LoRAs: {', '.join(adapter_names)}")
	else:
	print(f"[{label}] Transformer has no LoRAs in peft_config")
	else:
	print(f"[{label}] Transformer has no peft_config attribute")

	# Check for any LoRA modules
	for name, module in transformer.named_modules():
	if hasattr(module, 'lora_A') and module.lora_A:
	has_loras = True
	# print(f"[{label}] Found lora_A in module {name}")
	if hasattr(module, 'lora_B') and module.lora_B:
	has_loras = True
	# print(f"[{label}] Found lora_B in module {name}")

	if not has_loras:
	print(f"[{label}] No LoRA components found in transformer")


	parser = argparse.ArgumentParser()
	parser.add_argument('--share', action='store_true')
	parser.add_argument("--server", type=str, default='0.0.0.0')
	parser.add_argument("--port", type=int, required=False)
	parser.add_argument("--inbrowser", action='store_true')
	parser.add_argument("--lora", type=str, default=None, help="Lora path (comma separated for multiple)")
	args = parser.parse_args()

	print(args)

	free_mem_gb = get_cuda_free_memory_gb(gpu)
	high_vram = free_mem_gb > 60

	print(f'Free VRAM {free_mem_gb} GB')
	print(f'High-VRAM Mode: {high_vram}')

	# Load models
	text_encoder = LlamaModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder', torch_dtype=torch.float16).cpu()
	text_encoder_2 = CLIPTextModel.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='text_encoder_2', torch_dtype=torch.float16).cpu()
	tokenizer = LlamaTokenizerFast.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer')
	tokenizer_2 = CLIPTokenizer.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='tokenizer_2')
	vae = AutoencoderKLHunyuanVideo.from_pretrained("hunyuanvideo-community/HunyuanVideo", subfolder='vae', torch_dtype=torch.float16).cpu()

	feature_extractor = SiglipImageProcessor.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='feature_extractor')
	image_encoder = SiglipVisionModel.from_pretrained("lllyasviel/flux_redux_bfl", subfolder='image_encoder', torch_dtype=torch.float16).cpu()

	# Initialize transformer placeholders
	transformer_original = None
	transformer_f1 = None
	current_transformer = None # Will hold the currently active model

	# Load models based on VRAM availability later

	# Configure models
	vae.eval()
	text_encoder.eval()
	text_encoder_2.eval()
	image_encoder.eval()

	if not high_vram:
	vae.enable_slicing()
	vae.enable_tiling()


	vae.to(dtype=torch.float16)
	image_encoder.to(dtype=torch.float16)
	text_encoder.to(dtype=torch.float16)
	text_encoder_2.to(dtype=torch.float16)

	vae.requires_grad_(False)
	text_encoder.requires_grad_(False)
	text_encoder_2.requires_grad_(False)
	image_encoder.requires_grad_(False)

	# Create lora directory if it doesn't exist
	lora_dir = os.path.join(os.path.dirname(__file__), 'loras')
	os.makedirs(lora_dir, exist_ok=True)

	# Initialize LoRA support - moved scanning after settings load
	lora_names = []
	lora_values = [] # This seems unused for population, might be related to weights later

	script_dir = os.path.dirname(os.path.abspath(__file__))

	# Define default LoRA folder path relative to the script directory (used if setting is missing)
	default_lora_folder = os.path.join(script_dir, "loras")
	os.makedirs(default_lora_folder, exist_ok=True) # Ensure default exists

	if not high_vram:
	# DynamicSwapInstaller is same as huggingface's enable_sequential_offload but 3x faster
	DynamicSwapInstaller.install_model(text_encoder, device=gpu)
	else:
	text_encoder.to(gpu)
	text_encoder_2.to(gpu)
	image_encoder.to(gpu)
	vae.to(gpu)

	stream = AsyncStream()

	outputs_folder = './outputs/'
	os.makedirs(outputs_folder, exist_ok=True)

	# Initialize settings
	settings = Settings()

	# --- Populate LoRA names AFTER settings are loaded ---
	lora_folder_from_settings = settings.get("lora_dir", default_lora_folder) # Use setting, fallback to default
	print(f"Scanning for LoRAs in: {lora_folder_from_settings}")
	if os.path.isdir(lora_folder_from_settings):
	try:
	lora_files = [f for f in os.listdir(lora_folder_from_settings)
	if f.endswith('.safetensors') or f.endswith('.pt')]
	for lora_file in lora_files:
	lora_names.append(lora_file.split('.')[0]) # Get name without extension
	print(f"Found LoRAs: {lora_names}")
	except Exception as e:
	print(f"Error scanning LoRA directory '{lora_folder_from_settings}': {e}")
	else:
	print(f"LoRA directory not found: {lora_folder_from_settings}")
	# --- End LoRA population ---


	# Create job queue
	job_queue = VideoJobQueue()


	def move_lora_adapters_to_device(model, target_device):
	"""
	Move all LoRA adapters in a model to the specified device.
	This handles the PEFT implementation of LoRA.
	"""
	print(f"Moving all LoRA adapters to {target_device}")

	# First, find all modules with LoRA adapters
	lora_modules = []
	for name, module in model.named_modules():
	if hasattr(module, 'active_adapter') and hasattr(module, 'lora_A') and hasattr(module, 'lora_B'):
	lora_modules.append((name, module))

	# Now move all LoRA components to the target device
	for name, module in lora_modules:
	# Get the active adapter name
	active_adapter = module.active_adapter

	# Move the LoRA layers to the target device
	if active_adapter is not None:
	if isinstance(module.lora_A, torch.nn.ModuleDict):
	# Handle ModuleDict case (PEFT implementation)
	for adapter_name in list(module.lora_A.keys()):
	# Move lora_A
	if adapter_name in module.lora_A:
	module.lora_A[adapter_name] = module.lora_A[adapter_name].to(target_device)

	# Move lora_B
	if adapter_name in module.lora_B:
	module.lora_B[adapter_name] = module.lora_B[adapter_name].to(target_device)

	# Move scaling
	if hasattr(module, 'scaling') and isinstance(module.scaling, dict) and adapter_name in module.scaling:
	if isinstance(module.scaling[adapter_name], torch.Tensor):
	module.scaling[adapter_name] = module.scaling[adapter_name].to(target_device)
	else:
	# Handle direct attribute case
	if hasattr(module, 'lora_A') and module.lora_A is not None:
	module.lora_A = module.lora_A.to(target_device)
	if hasattr(module, 'lora_B') and module.lora_B is not None:
	module.lora_B = module.lora_B.to(target_device)
	if hasattr(module, 'scaling') and module.scaling is not None:
	if isinstance(module.scaling, torch.Tensor):
	module.scaling = module.scaling.to(target_device)

	print(f"Moved all LoRA adapters to {target_device}")
	return model


	# Function to load a LoRA file
	def load_lora_file(lora_file):
	if not lora_file:
	return None, "No file selected"

	try:
	# Get the filename from the path
	_, lora_name = os.path.split(lora_file)

	# Copy the file to the lora directory
	lora_dest = os.path.join(lora_dir, lora_name)
	import shutil
	shutil.copy(lora_file, lora_dest)

	# Load the LoRA - NOTE: This needs adjustment for multiple transformers
	global current_transformer, lora_names
	if current_transformer is None:
	return None, "Error: No model loaded to apply LoRA to. Generate something first."

	# ADDED: Unload any existing LoRAs first
	current_transformer = lora_utils.unload_all_loras(current_transformer)

	current_transformer = lora_utils.load_lora(current_transformer, lora_dir, lora_name)

	# Add to lora_names if not already there
	lora_base_name = lora_name.split('.')[0]
	if lora_base_name not in lora_names:
	lora_names.append(lora_base_name)

	# Get the current device of the transformer
	device = next(current_transformer.parameters()).device

	# Move all LoRA adapters to the same device as the base model
	move_lora_adapters_to_device(current_transformer, device)

	print(f"Loaded LoRA: {lora_name} to {type(current_transformer).__name__}")

	# ADDED: Verify LoRA state after loading
	verify_lora_state(current_transformer, "After loading LoRA file")

	return gr.update(choices=lora_names), f"Successfully loaded LoRA: {lora_name}"
	except Exception as e:
	print(f"Error loading LoRA: {e}")
	return None, f"Error loading LoRA: {e}"

	@torch.no_grad()
	def worker(
	model_type,
	input_image,
	prompt_text,
	n_prompt,
	seed,
	total_second_length,
	latent_window_size,
	steps,
	cfg,
	gs,
	rs,
	gpu_memory_preservation,
	use_teacache,
	mp4_crf,
	save_metadata,
	blend_sections,
	latent_type,
	selected_loras,
	clean_up_videos,
	lora_values=None,
	job_stream=None,
	output_dir=None,
	metadata_dir=None,
	resolutionW=640, # Add resolution parameter with default value
	resolutionH=640,
	lora_loaded_names=[]
	):
	global transformer_original, transformer_f1, current_transformer, high_vram

	# ADDED: Ensure any existing LoRAs are unloaded from the current transformer
	if current_transformer is not None:
	print("Unloading any existing LoRAs before starting new job")
	current_transformer = lora_utils.unload_all_loras(current_transformer)
	import gc
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	# ADDED: Verify LoRA state at worker start
	verify_lora_state(current_transformer, "Worker start")

	stream_to_use = job_stream if job_stream is not None else stream

	total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
	total_latent_sections = int(max(round(total_latent_sections), 1))

	# Parse the timestamped prompt with boundary snapping and reversing
	# prompt_text should now be the original string from the job queue
	prompt_sections = parse_timestamped_prompt(prompt_text, total_second_length, latent_window_size, model_type)
	job_id = generate_timestamp()

	stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Starting ...'))))

	try:
	if not high_vram:
	# Unload everything except the potentially active transformer
	unload_complete_models(text_encoder, text_encoder_2, image_encoder, vae)
	if current_transformer is not None:
	offload_model_from_device_for_memory_preservation(current_transformer, target_device=gpu, preserved_memory_gb=8)

	# --- Model Loading / Switching ---
	print(f"Worker starting for model type: {model_type}")
	target_transformer_model = None
	other_transformer_model = None

	if model_type == "Original":
	if transformer_original is None:
	print("Loading Original Transformer...")
	transformer_original = HunyuanVideoTransformer3DModelPacked.from_pretrained('lllyasviel/FramePackI2V_HY', torch_dtype=torch.bfloat16).cpu()
	transformer_original.eval()
	transformer_original.to(dtype=torch.bfloat16)
	transformer_original.requires_grad_(False)
	if not high_vram:
	DynamicSwapInstaller.install_model(transformer_original, device=gpu)
	print("Original Transformer Loaded.")
	target_transformer_model = transformer_original
	other_transformer_model = transformer_f1
	elif model_type == "F1":
	if transformer_f1 is None:
	print("Loading F1 Transformer...")
	transformer_f1 = HunyuanVideoTransformer3DModelPacked.from_pretrained('lllyasviel/FramePack_F1_I2V_HY_20250503', torch_dtype=torch.bfloat16).cpu()
	transformer_f1.eval()
	transformer_f1.to(dtype=torch.bfloat16)
	transformer_f1.requires_grad_(False)
	if not high_vram:
	DynamicSwapInstaller.install_model(transformer_f1, device=gpu)
	print("F1 Transformer Loaded.")
	target_transformer_model = transformer_f1
	other_transformer_model = transformer_original
	else:
	raise ValueError(f"Unknown model_type: {model_type}")

	# Unload the other model if it exists and we are in low VRAM mode
	if not high_vram and other_transformer_model is not None:
	print(f"Offloading inactive transformer: {type(other_transformer_model).__name__}")
	offload_model_from_device_for_memory_preservation(other_transformer_model, target_device=gpu, preserved_memory_gb=8)
	# Consider fully unloading if memory pressure is extreme:
	# unload_complete_models(other_transformer_model)
	# if model_type == "Original": transformer_f1 = None
	# else: transformer_original = None

	current_transformer = target_transformer_model # Set the globally accessible current model

	# ADDED: Ensure the target model has no LoRAs loaded
	print(f"Ensuring {model_type} transformer has no LoRAs loaded")
	current_transformer = lora_utils.unload_all_loras(current_transformer)
	verify_lora_state(current_transformer, "After model selection")

	# Ensure the target model is on the correct device if in high VRAM mode
	if high_vram and current_transformer.device != gpu:
	print(f"Moving {model_type} transformer to GPU (High VRAM mode)...")
	current_transformer.to(gpu)

	# Pre-encode all prompts
	stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Text encoding all prompts...'))))

	if not high_vram:
	fake_diffusers_current_device(text_encoder, gpu)
	load_model_as_complete(text_encoder_2, target_device=gpu)

	# PROMPT BLENDING: Pre-encode all prompts and store in a list in order
	unique_prompts = []
	for section in prompt_sections:
	if section.prompt not in unique_prompts:
	unique_prompts.append(section.prompt)

	encoded_prompts = {}
	for prompt in unique_prompts:
	llama_vec, clip_l_pooler = encode_prompt_conds(
	prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2
	)
	llama_vec, llama_attention_mask = crop_or_pad_yield_mask(llama_vec, length=512)
	encoded_prompts[prompt] = (llama_vec, llama_attention_mask, clip_l_pooler)

	# PROMPT BLENDING: Build a list of (start_section_idx, prompt) for each prompt
	prompt_change_indices = []
	last_prompt = None
	for idx, section in enumerate(prompt_sections):
	if section.prompt != last_prompt:
	prompt_change_indices.append((idx, section.prompt))
	last_prompt = section.prompt

	# Encode negative prompt
	if cfg == 1:
	llama_vec_n, llama_attention_mask_n, clip_l_pooler_n = (
	torch.zeros_like(encoded_prompts[prompt_sections[0].prompt][0]),
	torch.zeros_like(encoded_prompts[prompt_sections[0].prompt][1]),
	torch.zeros_like(encoded_prompts[prompt_sections[0].prompt][2])
	)
	else:
	llama_vec_n, clip_l_pooler_n = encode_prompt_conds(
	n_prompt, text_encoder, text_encoder_2, tokenizer, tokenizer_2
	)
	llama_vec_n, llama_attention_mask_n = crop_or_pad_yield_mask(llama_vec_n, length=512)

	# Processing input image
	stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Image processing ...'))))

	H, W, C = input_image.shape
	height, width = find_nearest_bucket(H, W, resolution=resolutionW)
	input_image_np = resize_and_center_crop(input_image, target_width=width, target_height=height)

	if save_metadata:
	metadata = PngInfo()
	# prompt_text should be a string here now
	metadata.add_text("prompt", prompt_text)
	metadata.add_text("seed", str(seed))
	Image.fromarray(input_image_np).save(os.path.join(metadata_dir, f'{job_id}.png'), pnginfo=metadata)

	metadata_dict = {
	"prompt": prompt_text, # Use the original string
	"seed": seed,
	"total_second_length": total_second_length,
	"steps": steps,
	"cfg": cfg,
	"gs": gs,
	"rs": rs,
	"latent_type" : latent_type,
	"blend_sections": blend_sections,
	"latent_window_size": latent_window_size,
	"mp4_crf": mp4_crf,
	"timestamp": time.time(),
	"resolutionW": resolutionW, # Add resolution to metadata
	"resolutionH": resolutionH,
	"model_type": model_type # Add model type to metadata
	}
	# Add LoRA information to metadata if LoRAs are used
	def ensure_list(x):
	if isinstance(x, list):
	return x
	elif x is None:
	return []
	else:
	return [x]

	selected_loras = ensure_list(selected_loras)
	lora_values = ensure_list(lora_values)

	if selected_loras and len(selected_loras) > 0:
	lora_data = {}
	for lora_name in selected_loras:
	try:
	idx = lora_loaded_names.index(lora_name)
	weight = lora_values[idx] if lora_values and idx < len(lora_values) else 1.0
	if isinstance(weight, list):
	weight_value = weight[0] if weight and len(weight) > 0 else 1.0
	else:
	weight_value = weight
	lora_data[lora_name] = float(weight_value)
	except ValueError:
	lora_data[lora_name] = 1.0
	metadata_dict["loras"] = lora_data

	with open(os.path.join(metadata_dir, f'{job_id}.json'), 'w') as f:
	json.dump(metadata_dict, f, indent=2)
	else:
	Image.fromarray(input_image_np).save(os.path.join(metadata_dir, f'{job_id}.png'))

	input_image_pt = torch.from_numpy(input_image_np).float() / 127.5 - 1
	input_image_pt = input_image_pt.permute(2, 0, 1)[None, :, None]

	# VAE encoding
	stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'VAE encoding ...'))))

	if not high_vram:
	load_model_as_complete(vae, target_device=gpu)

	start_latent = vae_encode(input_image_pt, vae)

	# CLIP Vision
	stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'CLIP Vision encoding ...'))))

	if not high_vram:
	load_model_as_complete(image_encoder, target_device=gpu)

	image_encoder_output = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder)
	image_encoder_last_hidden_state = image_encoder_output.last_hidden_state

	# Dtype
	for prompt_key in encoded_prompts:
	llama_vec, llama_attention_mask, clip_l_pooler = encoded_prompts[prompt_key]
	llama_vec = llama_vec.to(current_transformer.dtype)
	clip_l_pooler = clip_l_pooler.to(current_transformer.dtype)
	encoded_prompts[prompt_key] = (llama_vec, llama_attention_mask, clip_l_pooler)

	llama_vec_n = llama_vec_n.to(current_transformer.dtype)
	clip_l_pooler_n = clip_l_pooler_n.to(current_transformer.dtype)
	image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(current_transformer.dtype)

	# Sampling
	stream_to_use.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Start sampling ...'))))

	rnd = torch.Generator("cpu").manual_seed(seed)
	num_frames = latent_window_size * 4 - 3

	if model_type == "Original":
	history_latents = torch.zeros(size=(1, 16, 1 + 2 + 16, height // 8, width // 8), dtype=torch.float32).cpu()
	else: # F1 model
	# F1モードでは初期フレームを用意
	history_latents = torch.zeros(size=(1, 16, 16 + 2 + 1, height // 8, width // 8), dtype=torch.float32).cpu()
	# 開始フレームをhistory_latentsに追加
	history_latents = torch.cat([history_latents, start_latent.to(history_latents)], dim=2)
	total_generated_latent_frames = 1 # 最初のフレームを含むので1から開始

	history_pixels = None
	if model_type == "Original":
	total_generated_latent_frames = 0
	# Original model uses reversed latent paddings
	latent_paddings = reversed(range(total_latent_sections))
	if total_latent_sections > 4:
	latent_paddings = [3] + [2] * (total_latent_sections - 3) + [1, 0]
	else: # F1 model
	# F1 model doesn't use latent paddings in the same way
	# We'll use a fixed approach with just 0 for last section and 1 for others
	latent_paddings = [1] * (total_latent_sections - 1) + [0]

	# PROMPT BLENDING: Track section index
	section_idx = 0

	# ADDED: Completely unload all loras from the current transformer
	current_transformer = lora_utils.unload_all_loras(current_transformer)
	verify_lora_state(current_transformer, "Before loading LoRAs")

	# --- LoRA loading and scaling ---
	if selected_loras:
	for lora_name in selected_loras:
	idx = lora_loaded_names.index(lora_name)
	lora_file = None
	for ext in [".safetensors", ".pt"]:
	# Find any file that starts with the lora_name and ends with the extension
	matching_files = [f for f in os.listdir(lora_folder_from_settings)
	if f.startswith(lora_name) and f.endswith(ext)]
	if matching_files:
	lora_file = matching_files[0] # Use the first matching file
	break
	if lora_file:
	print(f"Loading LoRA {lora_file} to {model_type} model")
	current_transformer = lora_utils.load_lora(current_transformer, lora_folder_from_settings, lora_file)
	# Set LoRA strength if provided
	if lora_values and idx < len(lora_values):
	lora_strength = float(lora_values[idx])
	print(f"Setting LoRA {lora_name} strength to {lora_strength}")
	# Set scaling for this LoRA by iterating through modules
	for name, module in current_transformer.named_modules():
	if hasattr(module, 'scaling'):
	if isinstance(module.scaling, dict):
	# Handle ModuleDict case (PEFT implementation)
	if lora_name in module.scaling:
	if isinstance(module.scaling[lora_name], torch.Tensor):
	module.scaling[lora_name] = torch.tensor(
	lora_strength, device=module.scaling[lora_name].device
	)
	else:
	module.scaling[lora_name] = lora_strength
	else:
	# Handle direct attribute case for scaling if needed
	if isinstance(module.scaling, torch.Tensor):
	module.scaling = torch.tensor(
	lora_strength, device=module.scaling.device
	)
	else:
	module.scaling = lora_strength
	else:
	print(f"LoRA file for {lora_name} not found!")

	# ADDED: Verify LoRA state after loading
	verify_lora_state(current_transformer, "After loading LoRAs")

	# --- Callback for progress ---
	def callback(d):
	preview = d['denoised']
	preview = vae_decode_fake(preview)
	preview = (preview * 255.0).detach().cpu().numpy().clip(0, 255).astype(np.uint8)
	preview = einops.rearrange(preview, 'b c t h w -> (b h) (t w) c')

	if stream_to_use.input_queue.top() == 'end':
	stream_to_use.output_queue.push(('end', None))
	raise KeyboardInterrupt('User ends the task.')

	current_step = d['i'] + 1
	percentage = int(100.0 * current_step / steps)
	current_pos = (total_generated_latent_frames * 4 - 3) / 30
	original_pos = total_second_length - current_pos
	if current_pos < 0: current_pos = 0
	if original_pos < 0: original_pos = 0

	hint = f'Sampling {current_step}/{steps}'
	if model_type == "Original":
	desc = f'Total generated frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, ' \
	f'Video length: {max(0, (total_generated_latent_frames * 4 - 3) / 30):.2f} seconds (FPS-30). ' \
	f'Current position: {current_pos:.2f}s (original: {original_pos:.2f}s). ' \
	f'using prompt: {current_prompt[:256]}...'
	else: # F1 model
	desc = f'Total generated frames: {int(max(0, total_generated_latent_frames * 4 - 3))}, ' \
	f'Video length: {max(0, (total_generated_latent_frames * 4 - 3) / 30):.2f} seconds (FPS-30). ' \
	f'Current position: {current_pos:.2f}s. ' \
	f'using prompt: {current_prompt[:256]}...'

	progress_data = {
	'preview': preview,
	'desc': desc,
	'html': make_progress_bar_html(percentage, hint)
	}
	if job_stream is not None:
	job = job_queue.get_job(job_id)
	if job:
	job.progress_data = progress_data

	stream_to_use.output_queue.push(('progress', (preview, desc, make_progress_bar_html(percentage, hint))))

	# --- Main generation loop ---
	for latent_padding in latent_paddings:
	is_last_section = latent_padding == 0
	latent_padding_size = latent_padding * latent_window_size

	if stream_to_use.input_queue.top() == 'end':
	stream_to_use.output_queue.push(('end', None))
	return

	current_time_position = (total_generated_latent_frames * 4 - 3) / 30 # in seconds
	if current_time_position < 0:
	current_time_position = 0.01

	# Find the appropriate prompt for this section
	current_prompt = prompt_sections[0].prompt # Default to first prompt
	for section in prompt_sections:
	if section.start_time <= current_time_position and (section.end_time is None or current_time_position < section.end_time):
	current_prompt = section.prompt
	break

	# PROMPT BLENDING: Find if we're in a blend window
	blend_alpha = None
	prev_prompt = current_prompt
	next_prompt = current_prompt

	# Only try to blend if we have prompt change indices and multiple sections
	if prompt_change_indices and len(prompt_sections) > 1:
	for i, (change_idx, prompt) in enumerate(prompt_change_indices):
	if section_idx < change_idx:
	prev_prompt = prompt_change_indices[i - 1][1] if i > 0 else prompt
	next_prompt = prompt
	blend_start = change_idx
	blend_end = change_idx + blend_sections
	if section_idx >= change_idx and section_idx < blend_end:
	blend_alpha = (section_idx - change_idx + 1) / blend_sections
	break
	elif section_idx == change_idx:
	# At the exact change, start blending
	if i > 0:
	prev_prompt = prompt_change_indices[i - 1][1]
	next_prompt = prompt
	blend_alpha = 1.0 / blend_sections
	else:
	prev_prompt = prompt
	next_prompt = prompt
	blend_alpha = None
	break
	else:
	# After last change, no blending
	prev_prompt = current_prompt
	next_prompt = current_prompt
	blend_alpha = None

	# Get the encoded prompt for this section
	if blend_alpha is not None and prev_prompt != next_prompt:
	# Blend embeddings
	prev_llama_vec, prev_llama_attention_mask, prev_clip_l_pooler = encoded_prompts[prev_prompt]
	next_llama_vec, next_llama_attention_mask, next_clip_l_pooler = encoded_prompts[next_prompt]
	llama_vec = (1 - blend_alpha) * prev_llama_vec + blend_alpha * next_llama_vec
	llama_attention_mask = prev_llama_attention_mask # usually same
	clip_l_pooler = (1 - blend_alpha) * prev_clip_l_pooler + blend_alpha * next_clip_l_pooler
	print(f"Blending prompts: '{prev_prompt[:30]}...' -> '{next_prompt[:30]}...', alpha={blend_alpha:.2f}")
	else:
	llama_vec, llama_attention_mask, clip_l_pooler = encoded_prompts[current_prompt]

	original_time_position = total_second_length - current_time_position
	if original_time_position < 0:
	original_time_position = 0

	print(f'latent_padding_size = {latent_padding_size}, is_last_section = {is_last_section}, '
	f'time position: {current_time_position:.2f}s (original: {original_time_position:.2f}s), '
	f'using prompt: {current_prompt[:60]}...')

	if model_type == "Original":
	# Original model uses the standard indices approach
	indices = torch.arange(0, sum([1, latent_padding_size, latent_window_size, 1, 2, 16])).unsqueeze(0)
	clean_latent_indices_pre, blank_indices, latent_indices, clean_latent_indices_post, clean_latent_2x_indices, clean_latent_4x_indices = indices.split([1, latent_padding_size, latent_window_size, 1, 2, 16], dim=1)
	clean_latent_indices = torch.cat([clean_latent_indices_pre, clean_latent_indices_post], dim=1)
	else: # F1 model
	# F1 model uses a different indices approach
	# latent_window_sizeが4.5の場合は特別に5を使用
	effective_window_size = 5 if latent_window_size == 4.5 else int(latent_window_size)
	indices = torch.arange(0, sum([1, 16, 2, 1, latent_window_size])).unsqueeze(0)
	clean_latent_indices_start, clean_latent_4x_indices, clean_latent_2x_indices, clean_latent_1x_indices, latent_indices = indices.split([1, 16, 2, 1, latent_window_size], dim=1)
	clean_latent_indices = torch.cat([clean_latent_indices_start, clean_latent_1x_indices], dim=1)

	print(f"F1 model indices: clean_latent_indices shape={clean_latent_indices.shape}, latent_indices shape={latent_indices.shape}")

	if model_type == "Original":
	clean_latents_pre = start_latent.to(history_latents)
	clean_latents_post, clean_latents_2x, clean_latents_4x = history_latents[:, :, :1 + 2 + 16, :, :].split([1, 2, 16], dim=2)
	clean_latents = torch.cat([clean_latents_pre, clean_latents_post], dim=2)
	else: # F1 model
	# For F1, we take the last frames for clean latents
	clean_latents_4x, clean_latents_2x, clean_latents_1x = history_latents[:, :, -sum([16, 2, 1]):, :, :].split([16, 2, 1], dim=2)
	# For F1, we prepend the start latent to clean_latents_1x
	clean_latents = torch.cat([start_latent.to(history_latents), clean_latents_1x], dim=2)

	# Print debug info for F1 model
	print(f"F1 model section {section_idx+1}/{total_latent_sections}, latent_padding={latent_padding}")

	if not high_vram:
	# Unload VAE etc. before loading transformer
	unload_complete_models(vae, text_encoder, text_encoder_2, image_encoder)
	move_model_to_device_with_memory_preservation(current_transformer, target_device=gpu, preserved_memory_gb=gpu_memory_preservation)
	if selected_loras:
	move_lora_adapters_to_device(current_transformer, gpu)

	if use_teacache:
	current_transformer.initialize_teacache(enable_teacache=True, num_steps=steps)
	else:
	current_transformer.initialize_teacache(enable_teacache=False)

	generated_latents = sample_hunyuan(
	transformer=current_transformer,
	sampler='unipc',
	width=width,
	height=height,
	frames=num_frames,
	real_guidance_scale=cfg,
	distilled_guidance_scale=gs,
	guidance_rescale=rs,
	num_inference_steps=steps,
	generator=rnd,
	prompt_embeds=llama_vec,
	prompt_embeds_mask=llama_attention_mask,
	prompt_poolers=clip_l_pooler,
	negative_prompt_embeds=llama_vec_n,
	negative_prompt_embeds_mask=llama_attention_mask_n,
	negative_prompt_poolers=clip_l_pooler_n,
	device=gpu,
	dtype=torch.bfloat16,
	image_embeddings=image_encoder_last_hidden_state,
	latent_indices=latent_indices,
	clean_latents=clean_latents,
	clean_latent_indices=clean_latent_indices,
	clean_latents_2x=clean_latents_2x,
	clean_latent_2x_indices=clean_latent_2x_indices,
	clean_latents_4x=clean_latents_4x,
	clean_latent_4x_indices=clean_latent_4x_indices,
	callback=callback,
	)

	total_generated_latent_frames += int(generated_latents.shape[2])
	if model_type == "Original":
	history_latents = torch.cat([generated_latents.to(history_latents), history_latents], dim=2)
	else: # F1 model
	# For F1, we append new frames to the end
	history_latents = torch.cat([history_latents, generated_latents.to(history_latents)], dim=2)

	if not high_vram:
	if selected_loras:
	move_lora_adapters_to_device(current_transformer, cpu)
	offload_model_from_device_for_memory_preservation(current_transformer, target_device=gpu, preserved_memory_gb=8)
	load_model_as_complete(vae, target_device=gpu)

	if model_type == "Original":
	real_history_latents = history_latents[:, :, :total_generated_latent_frames, :, :]
	else: # F1 model
	# For F1, we take frames from the end
	real_history_latents = history_latents[:, :, -total_generated_latent_frames:, :, :]

	if history_pixels is None:
	history_pixels = vae_decode(real_history_latents, vae).cpu()
	else:
	section_latent_frames = (latent_window_size * 2 + 1) if is_last_section else (latent_window_size * 2)
	overlapped_frames = latent_window_size * 4 - 3

	if model_type == "Original":
	current_pixels = vae_decode(real_history_latents[:, :, :section_latent_frames], vae).cpu()
	history_pixels = soft_append_bcthw(current_pixels, history_pixels, overlapped_frames)
	else: # F1 model
	# For F1, we take frames from the end
	print(f"F1 model section {section_idx+1}/{total_latent_sections}, section_latent_frames={section_latent_frames}")
	print(f"F1 model real_history_latents shape: {real_history_latents.shape}, taking last {section_latent_frames} frames")

	# Get the frames from the end of real_history_latents
	current_pixels = vae_decode(real_history_latents[:, :, -section_latent_frames:], vae).cpu()

	print(f"F1 model current_pixels shape: {current_pixels.shape}, history_pixels shape: {history_pixels.shape if history_pixels is not None else 'None'}")

	# For F1 model, history_pixels is first, current_pixels is second
	history_pixels = soft_append_bcthw(history_pixels, current_pixels, overlapped_frames)

	print(f"F1 model after append, history_pixels shape: {history_pixels.shape}")

	if not high_vram:
	unload_complete_models()

	output_filename = os.path.join(output_dir, f'{job_id}_{total_generated_latent_frames}.mp4')
	save_bcthw_as_mp4(history_pixels, output_filename, fps=30, crf=mp4_crf)
	print(f'Decoded. Current latent shape {real_history_latents.shape}; pixel shape {history_pixels.shape}')
	stream_to_use.output_queue.push(('file', output_filename))

	if is_last_section:
	break

	section_idx += 1 # PROMPT BLENDING: increment section index

	# ADDED: Unload all LoRAs after generation completed
	if selected_loras:
	print("Unloading all LoRAs after generation completed")
	current_transformer = lora_utils.unload_all_loras(current_transformer)
	verify_lora_state(current_transformer, "After generation completed")
	import gc
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	except:
	traceback.print_exc()
	# ADDED: Unload all LoRAs after error
	if current_transformer is not None and selected_loras:
	print("Unloading all LoRAs after error")
	current_transformer = lora_utils.unload_all_loras(current_transformer)
	verify_lora_state(current_transformer, "After error")
	import gc
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	stream_to_use.output_queue.push(('error', f"Error during generation: {traceback.format_exc()}"))
	if not high_vram:
	# Ensure all models including the potentially active transformer are unloaded on error
	unload_complete_models(
	text_encoder, text_encoder_2, image_encoder, vae, current_transformer
	)

	if clean_up_videos:
	try:
	video_files = [
	f for f in os.listdir(output_dir)
	if f.startswith(f"{job_id}_") and f.endswith(".mp4")
	]
	print(f"Video files found for cleanup: {video_files}")
	if video_files:
	def get_frame_count(filename):
	try:
	# Handles filenames like jobid_123.mp4
	return int(filename.replace(f"{job_id}_", "").replace(".mp4", ""))
	except Exception:
	return -1
	video_files_sorted = sorted(video_files, key=get_frame_count)
	print(f"Sorted video files: {video_files_sorted}")
	final_video = video_files_sorted[-1]
	for vf in video_files_sorted[:-1]:
	full_path = os.path.join(output_dir, vf)
	try:
	os.remove(full_path)
	print(f"Deleted intermediate video: {full_path}")
	except Exception as e:
	print(f"Failed to delete {full_path}: {e}")
	except Exception as e:
	print(f"Error during video cleanup: {e}")

	# ADDED: Final verification of LoRA state
	verify_lora_state(current_transformer, "Worker end")

	stream_to_use.output_queue.push(('end', None))
	return



	# Set the worker function for the job queue
	job_queue.set_worker_function(worker)

	def get_duration( model_type,
	input_image,
	prompt_text,
	n_prompt,
	seed,
	total_second_length,
	latent_window_size,
	steps,
	cfg,
	gs,
	rs,
	gpu_memory_preservation,
	use_teacache,
	mp4_crf,
	save_metadata,
	blend_sections,
	latent_type,
	clean_up_videos,
	selected_loras,
	resolutionW,
	resolutionH,
	lora_loaded_names,
	*lora_values):
	return total_second_length * 60

	@spaces.GPU(duration=get_duration)
	def process(
	model_type,
	input_image,
	prompt_text,
	n_prompt,
	seed,
	total_second_length,
	latent_window_size,
	steps,
	cfg,
	gs,
	rs,
	gpu_memory_preservation,
	use_teacache,
	mp4_crf,
	save_metadata,
	blend_sections,
	latent_type,
	clean_up_videos,
	selected_loras,
	resolutionW,
	resolutionH,
	lora_loaded_names,
	*lora_values
	):

	# Create a blank black image if no
	# Create a default image based on the selected latent_type
	if input_image is None:
	default_height, default_width = resolutionH, resolutionW
	if latent_type == "White":
	# Create a white image
	input_image = np.ones((default_height, default_width, 3), dtype=np.uint8) * 255
	print("No input image provided. Using a blank white image.")

	elif latent_type == "Noise":
	# Create a noise image
	input_image = np.random.randint(0, 256, (default_height, default_width, 3), dtype=np.uint8)
	print("No input image provided. Using a random noise image.")

	elif latent_type == "Green Screen":
	# Create a green screen image with standard chroma key green (0, 177, 64)
	input_image = np.zeros((default_height, default_width, 3), dtype=np.uint8)
	input_image[:, :, 1] = 177 # Green channel
	input_image[:, :, 2] = 64 # Blue channel
	# Red channel remains 0
	print("No input image provided. Using a standard chroma key green screen.")

	else: # Default to "Black" or any other value
	# Create a black image
	input_image = np.zeros((default_height, default_width, 3), dtype=np.uint8)
	print(f"No input image provided. Using a blank black image (latent_type: {latent_type}).")


	# Create job parameters
	job_params = {
	'model_type': model_type,
	'input_image': input_image.copy(), # Make a copy to avoid reference issues
	'prompt_text': prompt_text,
	'n_prompt': n_prompt,
	'seed': seed,
	'total_second_length': total_second_length,
	'latent_window_size': latent_window_size,
	'latent_type': latent_type,
	'steps': steps,
	'cfg': cfg,
	'gs': gs,
	'rs': rs,
	'blend_sections': blend_sections,
	'gpu_memory_preservation': gpu_memory_preservation,
	'use_teacache': use_teacache,
	'mp4_crf': mp4_crf,
	'save_metadata': save_metadata,
	'selected_loras': selected_loras,
	'clean_up_videos': clean_up_videos,
	'output_dir': settings.get("output_dir"),
	'metadata_dir': settings.get("metadata_dir"),
	'resolutionW': resolutionW, # Add resolution parameter
	'resolutionH': resolutionH,
	'lora_loaded_names': lora_loaded_names
	}

	# Add LoRA values if provided - extract them from the tuple
	if lora_values:
	# Convert tuple to list
	lora_values_list = list(lora_values)
	job_params['lora_values'] = lora_values_list

	# Add job to queue
	job_id = job_queue.add_job(job_params)
	print(f"Added job {job_id} to queue")

	queue_status = update_queue_status()
	# Return immediately after adding to queue
	return None, job_id, None, '', f'Job added to queue. Job ID: {job_id}', gr.update(interactive=True), gr.update(interactive=True)



	def end_process():
	"""Cancel the current running job and update the queue status"""
	print("Cancelling current job")
	with job_queue.lock:
	if job_queue.current_job:
	job_id = job_queue.current_job.id
	print(f"Cancelling job {job_id}")

	# Send the end signal to the job's stream
	if job_queue.current_job.stream:
	job_queue.current_job.stream.input_queue.push('end')

	# Mark the job as cancelled
	job_queue.current_job.status = JobStatus.CANCELLED
	job_queue.current_job.completed_at = time.time() # Set completion time

	# Force an update to the queue status
	return update_queue_status()


	def update_queue_status():
	"""Update queue status and refresh job positions"""
	jobs = job_queue.get_all_jobs()
	for job in jobs:
	if job.status == JobStatus.PENDING:
	job.queue_position = job_queue.get_queue_position(job.id)

	# Make sure to update current running job info
	if job_queue.current_job:
	# Make sure the running job is showing status = RUNNING
	job_queue.current_job.status = JobStatus.RUNNING

	return format_queue_status(jobs)


	def monitor_job(job_id):
	"""
	Monitor a specific job and update the UI with the latest video segment as soon as it's available.
	"""
	if not job_id:
	yield None, None, None, '', 'No job ID provided', gr.update(interactive=True), gr.update(interactive=True)
	return

	last_video = None # Track the last video file shown

	while True:
	job = job_queue.get_job(job_id)
	if not job:
	yield None, job_id, None, '', 'Job not found', gr.update(interactive=True), gr.update(interactive=True)
	return

	# If a new video file is available, yield it immediately
	if job.result and job.result != last_video:
	last_video = job.result
	# You can also update preview/progress here if desired
	yield last_video, job_id, gr.update(visible=True), '', '', gr.update(interactive=True), gr.update(interactive=True)

	# Handle job status and progress
	if job.status == JobStatus.PENDING:
	position = job_queue.get_queue_position(job_id)
	yield last_video, job_id, gr.update(visible=True), '', f'Waiting in queue. Position: {position}', gr.update(interactive=True), gr.update(interactive=True)

	elif job.status == JobStatus.RUNNING:
	if job.progress_data and 'preview' in job.progress_data:
	preview = job.progress_data.get('preview')
	desc = job.progress_data.get('desc', '')
	html = job.progress_data.get('html', '')
	yield last_video, job_id, gr.update(visible=True, value=preview), desc, html, gr.update(interactive=True), gr.update(interactive=True)
	else:
	yield last_video, job_id, gr.update(visible=True), '', 'Processing...', gr.update(interactive=True), gr.update(interactive=True)

	elif job.status == JobStatus.COMPLETED:
	# Show the final video
	yield last_video, job_id, gr.update(visible=True), '', '', gr.update(interactive=True), gr.update(interactive=True)
	break

	elif job.status == JobStatus.FAILED:
	yield last_video, job_id, gr.update(visible=True), '', f'Error: {job.error}', gr.update(interactive=True), gr.update(interactive=True)
	break

	elif job.status == JobStatus.CANCELLED:
	yield last_video, job_id, gr.update(visible=True), '', 'Job cancelled', gr.update(interactive=True), gr.update(interactive=True)
	break

	# Wait a bit before checking again
	time.sleep(0.5)


	# Set Gradio temporary directory from settings
	os.environ["GRADIO_TEMP_DIR"] = settings.get("gradio_temp_dir")

	# Create the interface
	interface = create_interface(
	process_fn=process,
	monitor_fn=monitor_job,
	end_process_fn=end_process,
	update_queue_status_fn=update_queue_status,
	load_lora_file_fn=load_lora_file,
	job_queue=job_queue,
	settings=settings,
	lora_names=lora_names # Explicitly pass the found LoRA names
	)

	# Launch the interface
	# interface.launch(
	# server_name=args.server,
	# server_port=args.port,
	# share=args.share,
	# inbrowser=args.inbrowser
	# )
	interface.launch(share=True)