Spaces:
Sleeping
Sleeping
File size: 6,178 Bytes
bac4f19 a0415fd bac4f19 a0415fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import gradio as gr
import os
import torch
import gc
from typing import Optional
# Check if we're running on Hugging Face Spaces
IS_SPACES = os.environ.get("SPACE_ID") is not None
def check_gpu_memory():
"""Check available GPU memory"""
if torch.cuda.is_available():
return torch.cuda.get_device_properties(0).total_memory / 1024**3
return 0
def load_model():
"""Load the HunyuanVideo model with error handling"""
try:
# For Hugging Face Spaces, we need to be careful with memory
if IS_SPACES:
print("Running on Hugging Face Spaces")
gpu_memory = check_gpu_memory()
print(f"Available GPU memory: {gpu_memory:.1f} GB")
# Try to load the model
from transformers import AutoModel, AutoTokenizer
model_name = "tencent/HunyuanVideo"
# Use CPU if no GPU or limited memory
device = "cuda" if torch.cuda.is_available() and check_gpu_memory() > 8 else "cpu"
print(f"Using device: {device}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load model with appropriate settings for Spaces
model = AutoModel.from_pretrained(
model_name,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map="auto" if device == "cuda" else None,
low_cpu_mem_usage=True
)
return model, tokenizer, device
except Exception as e:
print(f"Error loading model: {e}")
return None, None, "cpu"
# Initialize model
MODEL, TOKENIZER, DEVICE = load_model()
def generate_video(prompt: str, duration: int = 5, resolution: str = "512x512") -> str:
"""Generate video from text prompt"""
if MODEL is None:
return "β Model not loaded. This might be due to memory limitations on Hugging Face Spaces."
try:
# Clear GPU cache if using CUDA
if DEVICE == "cuda":
torch.cuda.empty_cache()
gc.collect()
# Parse resolution
width, height = map(int, resolution.split('x'))
# Basic validation
if not prompt.strip():
return "β Please enter a valid prompt."
if duration < 1 or duration > 10:
return "β Duration must be between 1-10 seconds."
# This is where you would implement the actual video generation
# For now, return a placeholder message
return f"""
β
Video generation request processed:
π Prompt: {prompt}
β±οΈ Duration: {duration} seconds
π Resolution: {resolution}
π₯οΈ Device: {DEVICE}
Note: Actual video generation implementation needed.
The model is loaded and ready for inference.
"""
except Exception as e:
return f"β Error during generation: {str(e)}"
def get_system_info():
"""Get system information for debugging"""
info = f"""
π₯οΈ **System Information:**
- Python: {os.sys.version.split()[0]}
- PyTorch: {torch.__version__}
- CUDA Available: {torch.cuda.is_available()}
- GPU Memory: {check_gpu_memory():.1f} GB
- Running on Spaces: {IS_SPACES}
- Device: {DEVICE}
- Model Loaded: {'β
' if MODEL is not None else 'β'}
"""
return info
# Create Gradio interface
with gr.Blocks(title="HunyuanVideo Generator", theme=gr.themes.Soft()) as demo:
gr.Markdown("# π¬ HunyuanVideo Text-to-Video Generator")
gr.Markdown("Generate videos from text descriptions using the HunyuanVideo model.")
with gr.Tab("Generate Video"):
with gr.Row():
with gr.Column(scale=1):
prompt_input = gr.Textbox(
label="π Video Description",
placeholder="A cat playing with a ball of yarn in a sunny garden...",
lines=3,
max_lines=5
)
with gr.Row():
duration_slider = gr.Slider(
minimum=1,
maximum=10,
value=5,
step=1,
label="β±οΈ Duration (seconds)"
)
resolution_dropdown = gr.Dropdown(
choices=["256x256", "512x512", "768x768", "1024x1024"],
value="512x512",
label="π Resolution"
)
generate_btn = gr.Button("π¬ Generate Video", variant="primary", size="lg")
with gr.Column(scale=1):
output_text = gr.Textbox(
label="π Output",
lines=10,
show_copy_button=True
)
# Event handler
generate_btn.click(
fn=generate_video,
inputs=[prompt_input, duration_slider, resolution_dropdown],
outputs=output_text
)
# Example prompts
gr.Examples(
examples=[
["A beautiful sunset over a calm ocean with gentle waves", 5, "512x512"],
["A cat gracefully jumping between rooftops in a medieval town", 7, "768x768"],
["Cherry blossoms falling in a Japanese garden", 4, "512x512"],
["A spacecraft flying through a colorful nebula", 8, "1024x1024"]
],
inputs=[prompt_input, duration_slider, resolution_dropdown]
)
with gr.Tab("System Info"):
info_button = gr.Button("π Check System Info")
info_output = gr.Markdown()
info_button.click(
fn=get_system_info,
outputs=info_output
)
# Launch the app
if __name__ == "__main__":
demo.launch(
share=False, # Hugging Face Spaces handles sharing
server_name="0.0.0.0", # Important for Spaces
server_port=7860, # Default port for Spaces
show_error=True
) |