Spaces:

Taf2023
/

super-bold-space

Runtime error

App Files Files Community

Taf2023 commited on Oct 14

Commit

792b77a

verified ·

1 Parent(s): fa80846

Deploy Gradio app with multiple files

Browse files

Files changed (3) hide show

app.py +348 -0
config.py +9 -0
requirements.txt +12 -0

app.py ADDED Viewed

	@@ -0,0 +1,348 @@

+import gradio as gr
+import spaces
+import torch
+from diffusers import DiffusionPipeline
+import numpy as np
+from PIL import Image
+import tempfile
+import os
+from moviepy.editor import ImageSequenceClip, AudioFileClip
+import soundfile as sf
+from transformers import pipeline
+import time
+from typing import List, Tuple, Optional
+import json
+from config import Config
+from utils import VideoGenerator, AudioGenerator, ImageGenerator
+# Initialize generators
+image_gen = ImageGenerator()
+audio_gen = AudioGenerator()
+video_gen = VideoGenerator()
+@spaces.GPU(duration=1500)
+def compile_transformer():
+    """Compile the Stable Diffusion transformer for faster inference"""
+    with spaces.aoti_capture(image_gen.pipe.transformer) as call:
+        image_gen.pipe("test compilation prompt")
+    exported = torch.export.export(
+        image_gen.pipe.transformer,
+        args=call.args,
+        kwargs=call.kwargs,
+    )
+    return spaces.aoti_compile(exported)
+# Compile during startup
+print("Compiling AI models for optimal performance...")
+compiled_transformer = compile_transformer()
+spaces.aoti_apply(compiled_transformer, image_gen.pipe.transformer)
+print("✅ Models compiled successfully!")
+@spaces.GPU(duration=120)
+def generate_video(
+    prompt: str,
+    duration: int,
+    fps: int,
+    audio_type: str,
+    voice_gender: str,
+    music_style: str,
+    num_images: int,
+    image_size: int,
+    motion_strength: float,
+    progress=gr.Progress()
+) -> str:
+    """
+    Generate a video from text prompt with AI-generated images and audio
+    Args:
+        prompt: Text description for the video content
+        duration: Duration of the video in seconds
+        fps: Frames per second for the video
+        audio_type: Type of audio to generate (narration/music/both)
+        voice_gender: Gender for voice narration
+        music_style: Style of background music
+        num_images: Number of unique images to generate
+        image_size: Size of generated images
+        motion_strength: Strength of motion between frames
+    Returns:
+        Path to the generated video file
+    """
+    try:
+        progress(0.1, desc="Starting video generation...")
+        # Calculate timing
+        total_frames = duration * fps
+        frames_per_image = total_frames // num_images
+        progress(0.2, desc="Generating images...")
+        # Generate images
+        images = []
+        for i in range(num_images):
+            # Slightly vary the prompt for each image
+            varied_prompt = f"{prompt}, frame {i+1}, cinematic lighting"
+            image = image_gen.generate_image(
+                prompt=varied_prompt,
+                size=(image_size, image_size)
+            )
+            images.append(image)
+            progress(0.2 + (i / num_images) * 0.3, desc=f"Generated image {i+1}/{num_images}")
+        progress(0.5, desc="Generating audio...")
+        # Generate audio
+        audio_path = None
+        if audio_type in ["narration", "both"]:
+            narration_path = audio_gen.generate_narration(
+                text=prompt,
+                gender=voice_gender,
+                duration=duration
+            )
+            audio_path = narration_path
+        if audio_type in ["music", "both"]:
+            music_path = audio_gen.generate_music(
+                style=music_style,
+                duration=duration
+            )
+            if audio_path and audio_type == "both":
+                # Mix narration and music
+                audio_path = audio_gen.mix_audio(audio_path, music_path)
+            elif not audio_path:
+                audio_path = music_path
+        progress(0.7, desc="Creating video frames...")
+        # Create video frames with motion
+        video_frames = video_gen.create_motion_frames(
+            images=images,
+            frames_per_image=frames_per_image,
+            motion_strength=motion_strength
+        )
+        progress(0.9, desc="Composing final video...")
+        # Create video
+        video_path = video_gen.create_video(
+            frames=video_frames,
+            fps=fps,
+            audio_path=audio_path,
+            duration=duration
+        )
+        progress(1.0, desc="Video generation complete!")
+        return video_path
+    except Exception as e:
+        raise gr.Error(f"Error generating video: {str(e)}")
+@spaces.GPU(duration=60)
+def generate_sample_image(prompt: str, style: str) -> Image.Image:
+    """Generate a sample image for preview"""
+    styled_prompt = f"{prompt}, {style} style, high quality, detailed"
+    return image_gen.generate_image(
+        prompt=styled_prompt,
+        size=(512, 512)
+    )
+def create_demo():
+    """Create the Gradio demo interface"""
+    with gr.Blocks(
+        title="AI Video Generator",
+        theme=gr.themes.Soft(),
+        css="""
+        .gradio-container {
+            max-width: 1200px !important;
+        }
+        .header-text {
+            text-align: center;
+            margin-bottom: 2rem;
+        }
+        .preview-box {
+            border: 2px dashed #ccc;
+            border-radius: 10px;
+            padding: 20px;
+            text-align: center;
+        }
+        """
+    ) as demo:
+        gr.HTML("""
+        <div class="header-text">
+            <h1>🎬 AI Video Generator</h1>
+            <p>Create stunning videos from text prompts using AI-powered image and audio generation</p>
+            <p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a></p>
+        </div>
+        """)
+        with gr.Tabs():
+            with gr.TabItem("🎥 Generate Video"):
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        gr.Markdown("### 📝 Video Description")
+                        prompt_input = gr.Textbox(
+                            label="Enter your video concept",
+                            placeholder="A serene landscape with mountains and a lake at sunset...",
+                            lines=3,
+                            value="A beautiful forest with sunlight filtering through the trees, birds flying, peaceful nature scene"
+                        )
+                        gr.Markdown("### ⚙️ Video Settings")
+                        with gr.Row():
+                            duration_slider = gr.Slider(
+                                minimum=5,
+                                maximum=30,
+                                value=10,
+                                step=1,
+                                label="Duration (seconds)"
+                            )
+                            fps_slider = gr.Slider(
+                                minimum=12,
+                                maximum=30,
+                                value=24,
+                                step=1,
+                                label="FPS"
+                            )
+                        with gr.Row():
+                            num_images_slider = gr.Slider(
+                                minimum=3,
+                                maximum=10,
+                                value=5,
+                                step=1,
+                                label="Number of Scenes"
+                            )
+                            image_size_slider = gr.Slider(
+                                minimum=256,
+                                maximum=768,
+                                value=512,
+                                step=128,
+                                label="Image Size"
+                            )
+                        motion_slider = gr.Slider(
+                            minimum=0.1,
+                            maximum=1.0,
+                            value=0.3,
+                            step=0.1,
+                            label="Motion Strength"
+                        )
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 🎵 Audio Settings")
+                        audio_type_radio = gr.Radio(
+                            choices=["narration", "music", "both"],
+                            value="both",
+                            label="Audio Type"
+                        )
+                        voice_radio = gr.Radio(
+                            choices=["male", "female"],
+                            value="female",
+                            label="Voice Gender"
+                        )
+                        music_dropdown = gr.Dropdown(
+                            choices=["ambient", "cinematic", "upbeat", "peaceful", "dramatic"],
+                            value="peaceful",
+                            label="Music Style"
+                        )
+                generate_btn = gr.Button(
+                    "🎬 Generate Video",
+                    variant="primary",
+                    size="lg"
+                )
+                with gr.Column():
+                    video_output = gr.Video(
+                        label="Generated Video",
+                        visible=False
+                    )
+                    status_text = gr.Textbox(
+                        label="Status",
+                        visible=False,
+                        interactive=False
+                    )
+            with gr.TabItem("🖼️ Image Preview"):
+                gr.Markdown("### Preview image generation before creating the full video")
+                with gr.Row():
+                    preview_prompt = gr.Textbox(
+                        label="Test Prompt",
+                        placeholder="Enter a prompt to test image generation...",
+                        value="A majestic dragon flying over a castle"
+                    )
+                with gr.Row():
+                    style_dropdown = gr.Dropdown(
+                        choices=["photorealistic", "anime", "oil painting", "watercolor", "3D render"],
+                        value="photorealistic",
+                        label="Art Style"
+                    )
+                    preview_btn = gr.Button("Generate Preview", variant="secondary")
+                preview_image = gr.Image(
+                    label="Image Preview",
+                    type="pil",
+                    elem_classes=["preview-box"]
+                )
+        # Example prompts
+        gr.Markdown("### 💡 Example Prompts")
+        examples = gr.Examples(
+            examples=[
+                ["A futuristic city with flying cars and neon lights at night", 15, 24, "both", "female", "cinematic", 5, 512, 0.5],
+                ["A peaceful beach with waves crashing and palm trees swaying", 10, 24, "music", "male", "peaceful", 4, 512, 0.3],
+                ["A magical forest with glowing mushrooms and fairy lights", 12, 24, "both", "female", "ambient", 6, 512, 0.4],
+                ["A bustling marketplace in ancient Rome", 8, 24, "narration", "male", "dramatic", 4, 512, 0.6],
+            ],
+            inputs=[prompt_input, duration_slider, fps_slider, audio_type_radio, voice_radio, music_dropdown, num_images_slider, image_size_slider, motion_slider],
+            outputs=[video_output],
+            fn=generate_video,
+        )
+        # Event handlers
+        generate_btn.click(
+            fn=generate_video,
+            inputs=[
+                prompt_input, duration_slider, fps_slider,
+                audio_type_radio, voice_radio, music_dropdown,
+                num_images_slider, image_size_slider, motion_slider
+            ],
+            outputs=[video_output],
+            show_progress=True
+        ).then(
+            fn=lambda: "Video generation complete! You can now download your video.",
+            outputs=[status_text]
+        )
+        preview_btn.click(
+            fn=generate_sample_image,
+            inputs=[preview_prompt, style_dropdown],
+            outputs=[preview_image]
+        )
+        # Show status text when generation starts
+        generate_btn.click(
+            fn=lambda: "Starting video generation... This may take a few minutes.",
+            outputs=[status_text]
+        )
+        # Make video output visible after generation
+        generate_btn.click(
+            fn=lambda: gr.Video(visible=True),
+            outputs=[video_output]
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_demo()
+    demo.launch(
+        share=True,
+        show_error=True,
+        show_tips=True
+    )

config.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from dataclasses import dataclass
+@dataclass
+class Config:
+    """Configuration settings for the AI Video Generator"""
+    # Model settings
+    IMAGE_MODEL = "stabilityai/stable-diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+git+https://github.com/huggingface/diffusers
+git+https://github.com/huggingface/transformers
+sentencepiece
+accelerate
+torch
+tokenizers
+gradio
+requests
+Pillow
+moviepy
+soundfile
+numpy