File size: 6,178 Bytes
bac4f19
a0415fd
 
 
 
bac4f19
a0415fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import gradio as gr
import os
import torch
import gc
from typing import Optional

# Check if we're running on Hugging Face Spaces
IS_SPACES = os.environ.get("SPACE_ID") is not None

def check_gpu_memory():
    """Check available GPU memory"""
    if torch.cuda.is_available():
        return torch.cuda.get_device_properties(0).total_memory / 1024**3
    return 0

def load_model():
    """Load the HunyuanVideo model with error handling"""
    try:
        # For Hugging Face Spaces, we need to be careful with memory
        if IS_SPACES:
            print("Running on Hugging Face Spaces")
            gpu_memory = check_gpu_memory()
            print(f"Available GPU memory: {gpu_memory:.1f} GB")
        
        # Try to load the model
        from transformers import AutoModel, AutoTokenizer
        
        model_name = "tencent/HunyuanVideo"
        
        # Use CPU if no GPU or limited memory
        device = "cuda" if torch.cuda.is_available() and check_gpu_memory() > 8 else "cpu"
        print(f"Using device: {device}")
        
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        # Load model with appropriate settings for Spaces
        model = AutoModel.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
            device_map="auto" if device == "cuda" else None,
            low_cpu_mem_usage=True
        )
        
        return model, tokenizer, device
        
    except Exception as e:
        print(f"Error loading model: {e}")
        return None, None, "cpu"

# Initialize model
MODEL, TOKENIZER, DEVICE = load_model()

def generate_video(prompt: str, duration: int = 5, resolution: str = "512x512") -> str:
    """Generate video from text prompt"""
    
    if MODEL is None:
        return "❌ Model not loaded. This might be due to memory limitations on Hugging Face Spaces."
    
    try:
        # Clear GPU cache if using CUDA
        if DEVICE == "cuda":
            torch.cuda.empty_cache()
            gc.collect()
        
        # Parse resolution
        width, height = map(int, resolution.split('x'))
        
        # Basic validation
        if not prompt.strip():
            return "❌ Please enter a valid prompt."
        
        if duration < 1 or duration > 10:
            return "❌ Duration must be between 1-10 seconds."
        
        # This is where you would implement the actual video generation
        # For now, return a placeholder message
        return f"""
        βœ… Video generation request processed:
        
        πŸ“ Prompt: {prompt}
        ⏱️ Duration: {duration} seconds
        πŸ“ Resolution: {resolution}
        πŸ–₯️ Device: {DEVICE}
        
        Note: Actual video generation implementation needed.
        The model is loaded and ready for inference.
        """
        
    except Exception as e:
        return f"❌ Error during generation: {str(e)}"

def get_system_info():
    """Get system information for debugging"""
    info = f"""
    πŸ–₯️ **System Information:**
    - Python: {os.sys.version.split()[0]}
    - PyTorch: {torch.__version__}
    - CUDA Available: {torch.cuda.is_available()}
    - GPU Memory: {check_gpu_memory():.1f} GB
    - Running on Spaces: {IS_SPACES}
    - Device: {DEVICE}
    - Model Loaded: {'βœ…' if MODEL is not None else '❌'}
    """
    return info

# Create Gradio interface
with gr.Blocks(title="HunyuanVideo Generator", theme=gr.themes.Soft()) as demo:
    
    gr.Markdown("# 🎬 HunyuanVideo Text-to-Video Generator")
    gr.Markdown("Generate videos from text descriptions using the HunyuanVideo model.")
    
    with gr.Tab("Generate Video"):
        with gr.Row():
            with gr.Column(scale=1):
                prompt_input = gr.Textbox(
                    label="πŸ“ Video Description",
                    placeholder="A cat playing with a ball of yarn in a sunny garden...",
                    lines=3,
                    max_lines=5
                )
                
                with gr.Row():
                    duration_slider = gr.Slider(
                        minimum=1,
                        maximum=10,
                        value=5,
                        step=1,
                        label="⏱️ Duration (seconds)"
                    )
                    
                    resolution_dropdown = gr.Dropdown(
                        choices=["256x256", "512x512", "768x768", "1024x1024"],
                        value="512x512",
                        label="πŸ“ Resolution"
                    )
                
                generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
                
            with gr.Column(scale=1):
                output_text = gr.Textbox(
                    label="πŸ“‹ Output",
                    lines=10,
                    show_copy_button=True
                )
        
        # Event handler
        generate_btn.click(
            fn=generate_video,
            inputs=[prompt_input, duration_slider, resolution_dropdown],
            outputs=output_text
        )
        
        # Example prompts
        gr.Examples(
            examples=[
                ["A beautiful sunset over a calm ocean with gentle waves", 5, "512x512"],
                ["A cat gracefully jumping between rooftops in a medieval town", 7, "768x768"],
                ["Cherry blossoms falling in a Japanese garden", 4, "512x512"],
                ["A spacecraft flying through a colorful nebula", 8, "1024x1024"]
            ],
            inputs=[prompt_input, duration_slider, resolution_dropdown]
        )
    
    with gr.Tab("System Info"):
        info_button = gr.Button("πŸ” Check System Info")
        info_output = gr.Markdown()
        
        info_button.click(
            fn=get_system_info,
            outputs=info_output
        )

# Launch the app
if __name__ == "__main__":
    demo.launch(
        share=False,  # Hugging Face Spaces handles sharing
        server_name="0.0.0.0",  # Important for Spaces
        server_port=7860,  # Default port for Spaces
        show_error=True
    )