Spaces:

ozilion
/

tencent-HunyuanVideo

Sleeping

App Files Files Community

tencent-HunyuanVideo / app.py

ozilion

Update app.py

a0415fd verified 2 months ago

raw

history blame contribute delete

6.18 kB

	import gradio as gr
	import os
	import torch
	import gc
	from typing import Optional

	# Check if we're running on Hugging Face Spaces
	IS_SPACES = os.environ.get("SPACE_ID") is not None

	def check_gpu_memory():
	"""Check available GPU memory"""
	if torch.cuda.is_available():
	return torch.cuda.get_device_properties(0).total_memory / 1024**3
	return 0

	def load_model():
	"""Load the HunyuanVideo model with error handling"""
	try:
	# For Hugging Face Spaces, we need to be careful with memory
	if IS_SPACES:
	print("Running on Hugging Face Spaces")
	gpu_memory = check_gpu_memory()
	print(f"Available GPU memory: {gpu_memory:.1f} GB")

	# Try to load the model
	from transformers import AutoModel, AutoTokenizer

	model_name = "tencent/HunyuanVideo"

	# Use CPU if no GPU or limited memory
	device = "cuda" if torch.cuda.is_available() and check_gpu_memory() > 8 else "cpu"
	print(f"Using device: {device}")

	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Load model with appropriate settings for Spaces
	model = AutoModel.from_pretrained(
	model_name,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	device_map="auto" if device == "cuda" else None,
	low_cpu_mem_usage=True
	)

	return model, tokenizer, device

	except Exception as e:
	print(f"Error loading model: {e}")
	return None, None, "cpu"

	# Initialize model
	MODEL, TOKENIZER, DEVICE = load_model()

	def generate_video(prompt: str, duration: int = 5, resolution: str = "512x512") -> str:
	"""Generate video from text prompt"""

	if MODEL is None:
	return "❌ Model not loaded. This might be due to memory limitations on Hugging Face Spaces."

	try:
	# Clear GPU cache if using CUDA
	if DEVICE == "cuda":
	torch.cuda.empty_cache()
	gc.collect()

	# Parse resolution
	width, height = map(int, resolution.split('x'))

	# Basic validation
	if not prompt.strip():
	return "❌ Please enter a valid prompt."

	if duration < 1 or duration > 10:
	return "❌ Duration must be between 1-10 seconds."

	# This is where you would implement the actual video generation
	# For now, return a placeholder message
	return f"""
	✅ Video generation request processed:

	📝 Prompt: {prompt}
	⏱️ Duration: {duration} seconds
	📐 Resolution: {resolution}
	🖥️ Device: {DEVICE}

	Note: Actual video generation implementation needed.
	The model is loaded and ready for inference.
	"""

	except Exception as e:
	return f"❌ Error during generation: {str(e)}"

	def get_system_info():
	"""Get system information for debugging"""
	info = f"""
	🖥️ System Information:
	- Python: {os.sys.version.split()[0]}
	- PyTorch: {torch.__version__}
	- CUDA Available: {torch.cuda.is_available()}
	- GPU Memory: {check_gpu_memory():.1f} GB
	- Running on Spaces: {IS_SPACES}
	- Device: {DEVICE}
	- Model Loaded: {'✅' if MODEL is not None else '❌'}
	"""
	return info

	# Create Gradio interface
	with gr.Blocks(title="HunyuanVideo Generator", theme=gr.themes.Soft()) as demo:

	gr.Markdown("# 🎬 HunyuanVideo Text-to-Video Generator")
	gr.Markdown("Generate videos from text descriptions using the HunyuanVideo model.")

	with gr.Tab("Generate Video"):
	with gr.Row():
	with gr.Column(scale=1):
	prompt_input = gr.Textbox(
	label="📝 Video Description",
	placeholder="A cat playing with a ball of yarn in a sunny garden...",
	lines=3,
	max_lines=5
	)

	with gr.Row():
	duration_slider = gr.Slider(
	minimum=1,
	maximum=10,
	value=5,
	step=1,
	label="⏱️ Duration (seconds)"
	)

	resolution_dropdown = gr.Dropdown(
	choices=["256x256", "512x512", "768x768", "1024x1024"],
	value="512x512",
	label="📐 Resolution"
	)

	generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")

	with gr.Column(scale=1):
	output_text = gr.Textbox(
	label="📋 Output",
	lines=10,
	show_copy_button=True
	)

	# Event handler
	generate_btn.click(
	fn=generate_video,
	inputs=[prompt_input, duration_slider, resolution_dropdown],
	outputs=output_text
	)

	# Example prompts
	gr.Examples(
	examples=[
	["A beautiful sunset over a calm ocean with gentle waves", 5, "512x512"],
	["A cat gracefully jumping between rooftops in a medieval town", 7, "768x768"],
	["Cherry blossoms falling in a Japanese garden", 4, "512x512"],
	["A spacecraft flying through a colorful nebula", 8, "1024x1024"]
	],
	inputs=[prompt_input, duration_slider, resolution_dropdown]
	)

	with gr.Tab("System Info"):
	info_button = gr.Button("🔍 Check System Info")
	info_output = gr.Markdown()

	info_button.click(
	fn=get_system_info,
	outputs=info_output
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch(
	share=False, # Hugging Face Spaces handles sharing
	server_name="0.0.0.0", # Important for Spaces
	server_port=7860, # Default port for Spaces
	show_error=True
	)