import gradio as gr
import os
import torch
import gc
from typing import Optional

# Check if we're running on Hugging Face Spaces
IS_SPACES = os.environ.get("SPACE_ID") is not None

def check_gpu_memory():
    """Check available GPU memory"""
    if torch.cuda.is_available():
        return torch.cuda.get_device_properties(0).total_memory / 1024**3
    return 0

def load_model():
    """Load the HunyuanVideo model with error handling"""
    try:
        # For Hugging Face Spaces, we need to be careful with memory
        if IS_SPACES:
            print("Running on Hugging Face Spaces")
            gpu_memory = check_gpu_memory()
            print(f"Available GPU memory: {gpu_memory:.1f} GB")
        
        # Try to load the model
        from transformers import AutoModel, AutoTokenizer
        
        model_name = "tencent/HunyuanVideo"
        
        # Use CPU if no GPU or limited memory
        device = "cuda" if torch.cuda.is_available() and check_gpu_memory() > 8 else "cpu"
        print(f"Using device: {device}")
        
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        # Load model with appropriate settings for Spaces
        model = AutoModel.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
            device_map="auto" if device == "cuda" else None,
            low_cpu_mem_usage=True
        )
        
        return model, tokenizer, device
        
    except Exception as e:
        print(f"Error loading model: {e}")
        return None, None, "cpu"

# Initialize model
MODEL, TOKENIZER, DEVICE = load_model()

def generate_video(prompt: str, duration: int = 5, resolution: str = "512x512") -> str:
    """Generate video from text prompt"""
    
    if MODEL is None:
        return "❌ Model not loaded. This might be due to memory limitations on Hugging Face Spaces."
    
    try:
        # Clear GPU cache if using CUDA
        if DEVICE == "cuda":
            torch.cuda.empty_cache()
            gc.collect()
        
        # Parse resolution
        width, height = map(int, resolution.split('x'))
        
        # Basic validation
        if not prompt.strip():
            return "❌ Please enter a valid prompt."
        
        if duration < 1 or duration > 10:
            return "❌ Duration must be between 1-10 seconds."
        
        # This is where you would implement the actual video generation
        # For now, return a placeholder message
        return f"""
        ✅ Video generation request processed:
        
        📝 Prompt: {prompt}
        ⏱️ Duration: {duration} seconds
        📐 Resolution: {resolution}
        🖥️ Device: {DEVICE}
        
        Note: Actual video generation implementation needed.
        The model is loaded and ready for inference.
        """
        
    except Exception as e:
        return f"❌ Error during generation: {str(e)}"

def get_system_info():
    """Get system information for debugging"""
    info = f"""
    🖥️ **System Information:**
    - Python: {os.sys.version.split()[0]}
    - PyTorch: {torch.__version__}
    - CUDA Available: {torch.cuda.is_available()}
    - GPU Memory: {check_gpu_memory():.1f} GB
    - Running on Spaces: {IS_SPACES}
    - Device: {DEVICE}
    - Model Loaded: {'✅' if MODEL is not None else '❌'}
    """
    return info

# Create Gradio interface
with gr.Blocks(title="HunyuanVideo Generator", theme=gr.themes.Soft()) as demo:
    
    gr.Markdown("# 🎬 HunyuanVideo Text-to-Video Generator")
    gr.Markdown("Generate videos from text descriptions using the HunyuanVideo model.")
    
    with gr.Tab("Generate Video"):
        with gr.Row():
            with gr.Column(scale=1):
                prompt_input = gr.Textbox(
                    label="📝 Video Description",
                    placeholder="A cat playing with a ball of yarn in a sunny garden...",
                    lines=3,
                    max_lines=5
                )
                
                with gr.Row():
                    duration_slider = gr.Slider(
                        minimum=1,
                        maximum=10,
                        value=5,
                        step=1,
                        label="⏱️ Duration (seconds)"
                    )
                    
                    resolution_dropdown = gr.Dropdown(
                        choices=["256x256", "512x512", "768x768", "1024x1024"],
                        value="512x512",
                        label="📐 Resolution"
                    )
                
                generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
                
            with gr.Column(scale=1):
                output_text = gr.Textbox(
                    label="📋 Output",
                    lines=10,
                    show_copy_button=True
                )
        
        # Event handler
        generate_btn.click(
            fn=generate_video,
            inputs=[prompt_input, duration_slider, resolution_dropdown],
            outputs=output_text
        )
        
        # Example prompts
        gr.Examples(
            examples=[
                ["A beautiful sunset over a calm ocean with gentle waves", 5, "512x512"],
                ["A cat gracefully jumping between rooftops in a medieval town", 7, "768x768"],
                ["Cherry blossoms falling in a Japanese garden", 4, "512x512"],
                ["A spacecraft flying through a colorful nebula", 8, "1024x1024"]
            ],
            inputs=[prompt_input, duration_slider, resolution_dropdown]
        )
    
    with gr.Tab("System Info"):
        info_button = gr.Button("🔍 Check System Info")
        info_output = gr.Markdown()
        
        info_button.click(
            fn=get_system_info,
            outputs=info_output
        )

# Launch the app
if __name__ == "__main__":
    demo.launch(
        share=False,  # Hugging Face Spaces handles sharing
        server_name="0.0.0.0",  # Important for Spaces
        server_port=7860,  # Default port for Spaces
        show_error=True
    )