ozilion's picture
Update app.py
a0415fd verified
import gradio as gr
import os
import torch
import gc
from typing import Optional
# Check if we're running on Hugging Face Spaces
IS_SPACES = os.environ.get("SPACE_ID") is not None
def check_gpu_memory():
"""Check available GPU memory"""
if torch.cuda.is_available():
return torch.cuda.get_device_properties(0).total_memory / 1024**3
return 0
def load_model():
"""Load the HunyuanVideo model with error handling"""
try:
# For Hugging Face Spaces, we need to be careful with memory
if IS_SPACES:
print("Running on Hugging Face Spaces")
gpu_memory = check_gpu_memory()
print(f"Available GPU memory: {gpu_memory:.1f} GB")
# Try to load the model
from transformers import AutoModel, AutoTokenizer
model_name = "tencent/HunyuanVideo"
# Use CPU if no GPU or limited memory
device = "cuda" if torch.cuda.is_available() and check_gpu_memory() > 8 else "cpu"
print(f"Using device: {device}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load model with appropriate settings for Spaces
model = AutoModel.from_pretrained(
model_name,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map="auto" if device == "cuda" else None,
low_cpu_mem_usage=True
)
return model, tokenizer, device
except Exception as e:
print(f"Error loading model: {e}")
return None, None, "cpu"
# Initialize model
MODEL, TOKENIZER, DEVICE = load_model()
def generate_video(prompt: str, duration: int = 5, resolution: str = "512x512") -> str:
"""Generate video from text prompt"""
if MODEL is None:
return "❌ Model not loaded. This might be due to memory limitations on Hugging Face Spaces."
try:
# Clear GPU cache if using CUDA
if DEVICE == "cuda":
torch.cuda.empty_cache()
gc.collect()
# Parse resolution
width, height = map(int, resolution.split('x'))
# Basic validation
if not prompt.strip():
return "❌ Please enter a valid prompt."
if duration < 1 or duration > 10:
return "❌ Duration must be between 1-10 seconds."
# This is where you would implement the actual video generation
# For now, return a placeholder message
return f"""
βœ… Video generation request processed:
πŸ“ Prompt: {prompt}
⏱️ Duration: {duration} seconds
πŸ“ Resolution: {resolution}
πŸ–₯️ Device: {DEVICE}
Note: Actual video generation implementation needed.
The model is loaded and ready for inference.
"""
except Exception as e:
return f"❌ Error during generation: {str(e)}"
def get_system_info():
"""Get system information for debugging"""
info = f"""
πŸ–₯️ **System Information:**
- Python: {os.sys.version.split()[0]}
- PyTorch: {torch.__version__}
- CUDA Available: {torch.cuda.is_available()}
- GPU Memory: {check_gpu_memory():.1f} GB
- Running on Spaces: {IS_SPACES}
- Device: {DEVICE}
- Model Loaded: {'βœ…' if MODEL is not None else '❌'}
"""
return info
# Create Gradio interface
with gr.Blocks(title="HunyuanVideo Generator", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎬 HunyuanVideo Text-to-Video Generator")
gr.Markdown("Generate videos from text descriptions using the HunyuanVideo model.")
with gr.Tab("Generate Video"):
with gr.Row():
with gr.Column(scale=1):
prompt_input = gr.Textbox(
label="πŸ“ Video Description",
placeholder="A cat playing with a ball of yarn in a sunny garden...",
lines=3,
max_lines=5
)
with gr.Row():
duration_slider = gr.Slider(
minimum=1,
maximum=10,
value=5,
step=1,
label="⏱️ Duration (seconds)"
)
resolution_dropdown = gr.Dropdown(
choices=["256x256", "512x512", "768x768", "1024x1024"],
value="512x512",
label="πŸ“ Resolution"
)
generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
with gr.Column(scale=1):
output_text = gr.Textbox(
label="πŸ“‹ Output",
lines=10,
show_copy_button=True
)
# Event handler
generate_btn.click(
fn=generate_video,
inputs=[prompt_input, duration_slider, resolution_dropdown],
outputs=output_text
)
# Example prompts
gr.Examples(
examples=[
["A beautiful sunset over a calm ocean with gentle waves", 5, "512x512"],
["A cat gracefully jumping between rooftops in a medieval town", 7, "768x768"],
["Cherry blossoms falling in a Japanese garden", 4, "512x512"],
["A spacecraft flying through a colorful nebula", 8, "1024x1024"]
],
inputs=[prompt_input, duration_slider, resolution_dropdown]
)
with gr.Tab("System Info"):
info_button = gr.Button("πŸ” Check System Info")
info_output = gr.Markdown()
info_button.click(
fn=get_system_info,
outputs=info_output
)
# Launch the app
if __name__ == "__main__":
demo.launch(
share=False, # Hugging Face Spaces handles sharing
server_name="0.0.0.0", # Important for Spaces
server_port=7860, # Default port for Spaces
show_error=True
)