Taf2023 commited on
Commit
792b77a
·
verified ·
1 Parent(s): fa80846

Deploy Gradio app with multiple files

Browse files
Files changed (3) hide show
  1. app.py +348 -0
  2. config.py +9 -0
  3. requirements.txt +12 -0
app.py ADDED
@@ -0,0 +1,348 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from diffusers import DiffusionPipeline
5
+ import numpy as np
6
+ from PIL import Image
7
+ import tempfile
8
+ import os
9
+ from moviepy.editor import ImageSequenceClip, AudioFileClip
10
+ import soundfile as sf
11
+ from transformers import pipeline
12
+ import time
13
+ from typing import List, Tuple, Optional
14
+ import json
15
+
16
+ from config import Config
17
+ from utils import VideoGenerator, AudioGenerator, ImageGenerator
18
+
19
+ # Initialize generators
20
+ image_gen = ImageGenerator()
21
+ audio_gen = AudioGenerator()
22
+ video_gen = VideoGenerator()
23
+
24
+ @spaces.GPU(duration=1500)
25
+ def compile_transformer():
26
+ """Compile the Stable Diffusion transformer for faster inference"""
27
+ with spaces.aoti_capture(image_gen.pipe.transformer) as call:
28
+ image_gen.pipe("test compilation prompt")
29
+
30
+ exported = torch.export.export(
31
+ image_gen.pipe.transformer,
32
+ args=call.args,
33
+ kwargs=call.kwargs,
34
+ )
35
+ return spaces.aoti_compile(exported)
36
+
37
+ # Compile during startup
38
+ print("Compiling AI models for optimal performance...")
39
+ compiled_transformer = compile_transformer()
40
+ spaces.aoti_apply(compiled_transformer, image_gen.pipe.transformer)
41
+ print("✅ Models compiled successfully!")
42
+
43
+ @spaces.GPU(duration=120)
44
+ def generate_video(
45
+ prompt: str,
46
+ duration: int,
47
+ fps: int,
48
+ audio_type: str,
49
+ voice_gender: str,
50
+ music_style: str,
51
+ num_images: int,
52
+ image_size: int,
53
+ motion_strength: float,
54
+ progress=gr.Progress()
55
+ ) -> str:
56
+ """
57
+ Generate a video from text prompt with AI-generated images and audio
58
+
59
+ Args:
60
+ prompt: Text description for the video content
61
+ duration: Duration of the video in seconds
62
+ fps: Frames per second for the video
63
+ audio_type: Type of audio to generate (narration/music/both)
64
+ voice_gender: Gender for voice narration
65
+ music_style: Style of background music
66
+ num_images: Number of unique images to generate
67
+ image_size: Size of generated images
68
+ motion_strength: Strength of motion between frames
69
+
70
+ Returns:
71
+ Path to the generated video file
72
+ """
73
+ try:
74
+ progress(0.1, desc="Starting video generation...")
75
+
76
+ # Calculate timing
77
+ total_frames = duration * fps
78
+ frames_per_image = total_frames // num_images
79
+
80
+ progress(0.2, desc="Generating images...")
81
+ # Generate images
82
+ images = []
83
+ for i in range(num_images):
84
+ # Slightly vary the prompt for each image
85
+ varied_prompt = f"{prompt}, frame {i+1}, cinematic lighting"
86
+ image = image_gen.generate_image(
87
+ prompt=varied_prompt,
88
+ size=(image_size, image_size)
89
+ )
90
+ images.append(image)
91
+ progress(0.2 + (i / num_images) * 0.3, desc=f"Generated image {i+1}/{num_images}")
92
+
93
+ progress(0.5, desc="Generating audio...")
94
+ # Generate audio
95
+ audio_path = None
96
+ if audio_type in ["narration", "both"]:
97
+ narration_path = audio_gen.generate_narration(
98
+ text=prompt,
99
+ gender=voice_gender,
100
+ duration=duration
101
+ )
102
+ audio_path = narration_path
103
+
104
+ if audio_type in ["music", "both"]:
105
+ music_path = audio_gen.generate_music(
106
+ style=music_style,
107
+ duration=duration
108
+ )
109
+ if audio_path and audio_type == "both":
110
+ # Mix narration and music
111
+ audio_path = audio_gen.mix_audio(audio_path, music_path)
112
+ elif not audio_path:
113
+ audio_path = music_path
114
+
115
+ progress(0.7, desc="Creating video frames...")
116
+ # Create video frames with motion
117
+ video_frames = video_gen.create_motion_frames(
118
+ images=images,
119
+ frames_per_image=frames_per_image,
120
+ motion_strength=motion_strength
121
+ )
122
+
123
+ progress(0.9, desc="Composing final video...")
124
+ # Create video
125
+ video_path = video_gen.create_video(
126
+ frames=video_frames,
127
+ fps=fps,
128
+ audio_path=audio_path,
129
+ duration=duration
130
+ )
131
+
132
+ progress(1.0, desc="Video generation complete!")
133
+ return video_path
134
+
135
+ except Exception as e:
136
+ raise gr.Error(f"Error generating video: {str(e)}")
137
+
138
+ @spaces.GPU(duration=60)
139
+ def generate_sample_image(prompt: str, style: str) -> Image.Image:
140
+ """Generate a sample image for preview"""
141
+ styled_prompt = f"{prompt}, {style} style, high quality, detailed"
142
+ return image_gen.generate_image(
143
+ prompt=styled_prompt,
144
+ size=(512, 512)
145
+ )
146
+
147
+ def create_demo():
148
+ """Create the Gradio demo interface"""
149
+
150
+ with gr.Blocks(
151
+ title="AI Video Generator",
152
+ theme=gr.themes.Soft(),
153
+ css="""
154
+ .gradio-container {
155
+ max-width: 1200px !important;
156
+ }
157
+ .header-text {
158
+ text-align: center;
159
+ margin-bottom: 2rem;
160
+ }
161
+ .preview-box {
162
+ border: 2px dashed #ccc;
163
+ border-radius: 10px;
164
+ padding: 20px;
165
+ text-align: center;
166
+ }
167
+ """
168
+ ) as demo:
169
+
170
+ gr.HTML("""
171
+ <div class="header-text">
172
+ <h1>🎬 AI Video Generator</h1>
173
+ <p>Create stunning videos from text prompts using AI-powered image and audio generation</p>
174
+ <p><a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a></p>
175
+ </div>
176
+ """)
177
+
178
+ with gr.Tabs():
179
+ with gr.TabItem("🎥 Generate Video"):
180
+ with gr.Row():
181
+ with gr.Column(scale=2):
182
+ gr.Markdown("### 📝 Video Description")
183
+ prompt_input = gr.Textbox(
184
+ label="Enter your video concept",
185
+ placeholder="A serene landscape with mountains and a lake at sunset...",
186
+ lines=3,
187
+ value="A beautiful forest with sunlight filtering through the trees, birds flying, peaceful nature scene"
188
+ )
189
+
190
+ gr.Markdown("### ⚙️ Video Settings")
191
+ with gr.Row():
192
+ duration_slider = gr.Slider(
193
+ minimum=5,
194
+ maximum=30,
195
+ value=10,
196
+ step=1,
197
+ label="Duration (seconds)"
198
+ )
199
+ fps_slider = gr.Slider(
200
+ minimum=12,
201
+ maximum=30,
202
+ value=24,
203
+ step=1,
204
+ label="FPS"
205
+ )
206
+
207
+ with gr.Row():
208
+ num_images_slider = gr.Slider(
209
+ minimum=3,
210
+ maximum=10,
211
+ value=5,
212
+ step=1,
213
+ label="Number of Scenes"
214
+ )
215
+ image_size_slider = gr.Slider(
216
+ minimum=256,
217
+ maximum=768,
218
+ value=512,
219
+ step=128,
220
+ label="Image Size"
221
+ )
222
+
223
+ motion_slider = gr.Slider(
224
+ minimum=0.1,
225
+ maximum=1.0,
226
+ value=0.3,
227
+ step=0.1,
228
+ label="Motion Strength"
229
+ )
230
+
231
+ with gr.Column(scale=1):
232
+ gr.Markdown("### 🎵 Audio Settings")
233
+ audio_type_radio = gr.Radio(
234
+ choices=["narration", "music", "both"],
235
+ value="both",
236
+ label="Audio Type"
237
+ )
238
+
239
+ voice_radio = gr.Radio(
240
+ choices=["male", "female"],
241
+ value="female",
242
+ label="Voice Gender"
243
+ )
244
+
245
+ music_dropdown = gr.Dropdown(
246
+ choices=["ambient", "cinematic", "upbeat", "peaceful", "dramatic"],
247
+ value="peaceful",
248
+ label="Music Style"
249
+ )
250
+
251
+ generate_btn = gr.Button(
252
+ "🎬 Generate Video",
253
+ variant="primary",
254
+ size="lg"
255
+ )
256
+
257
+ with gr.Column():
258
+ video_output = gr.Video(
259
+ label="Generated Video",
260
+ visible=False
261
+ )
262
+
263
+ status_text = gr.Textbox(
264
+ label="Status",
265
+ visible=False,
266
+ interactive=False
267
+ )
268
+
269
+ with gr.TabItem("🖼️ Image Preview"):
270
+ gr.Markdown("### Preview image generation before creating the full video")
271
+
272
+ with gr.Row():
273
+ preview_prompt = gr.Textbox(
274
+ label="Test Prompt",
275
+ placeholder="Enter a prompt to test image generation...",
276
+ value="A majestic dragon flying over a castle"
277
+ )
278
+
279
+ with gr.Row():
280
+ style_dropdown = gr.Dropdown(
281
+ choices=["photorealistic", "anime", "oil painting", "watercolor", "3D render"],
282
+ value="photorealistic",
283
+ label="Art Style"
284
+ )
285
+ preview_btn = gr.Button("Generate Preview", variant="secondary")
286
+
287
+ preview_image = gr.Image(
288
+ label="Image Preview",
289
+ type="pil",
290
+ elem_classes=["preview-box"]
291
+ )
292
+
293
+ # Example prompts
294
+ gr.Markdown("### 💡 Example Prompts")
295
+ examples = gr.Examples(
296
+ examples=[
297
+ ["A futuristic city with flying cars and neon lights at night", 15, 24, "both", "female", "cinematic", 5, 512, 0.5],
298
+ ["A peaceful beach with waves crashing and palm trees swaying", 10, 24, "music", "male", "peaceful", 4, 512, 0.3],
299
+ ["A magical forest with glowing mushrooms and fairy lights", 12, 24, "both", "female", "ambient", 6, 512, 0.4],
300
+ ["A bustling marketplace in ancient Rome", 8, 24, "narration", "male", "dramatic", 4, 512, 0.6],
301
+ ],
302
+ inputs=[prompt_input, duration_slider, fps_slider, audio_type_radio, voice_radio, music_dropdown, num_images_slider, image_size_slider, motion_slider],
303
+ outputs=[video_output],
304
+ fn=generate_video,
305
+ )
306
+
307
+ # Event handlers
308
+ generate_btn.click(
309
+ fn=generate_video,
310
+ inputs=[
311
+ prompt_input, duration_slider, fps_slider,
312
+ audio_type_radio, voice_radio, music_dropdown,
313
+ num_images_slider, image_size_slider, motion_slider
314
+ ],
315
+ outputs=[video_output],
316
+ show_progress=True
317
+ ).then(
318
+ fn=lambda: "Video generation complete! You can now download your video.",
319
+ outputs=[status_text]
320
+ )
321
+
322
+ preview_btn.click(
323
+ fn=generate_sample_image,
324
+ inputs=[preview_prompt, style_dropdown],
325
+ outputs=[preview_image]
326
+ )
327
+
328
+ # Show status text when generation starts
329
+ generate_btn.click(
330
+ fn=lambda: "Starting video generation... This may take a few minutes.",
331
+ outputs=[status_text]
332
+ )
333
+
334
+ # Make video output visible after generation
335
+ generate_btn.click(
336
+ fn=lambda: gr.Video(visible=True),
337
+ outputs=[video_output]
338
+ )
339
+
340
+ return demo
341
+
342
+ if __name__ == "__main__":
343
+ demo = create_demo()
344
+ demo.launch(
345
+ share=True,
346
+ show_error=True,
347
+ show_tips=True
348
+ )
config.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from dataclasses import dataclass
3
+
4
+ @dataclass
5
+ class Config:
6
+ """Configuration settings for the AI Video Generator"""
7
+
8
+ # Model settings
9
+ IMAGE_MODEL = "stabilityai/stable-diff
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/diffusers
2
+ git+https://github.com/huggingface/transformers
3
+ sentencepiece
4
+ accelerate
5
+ torch
6
+ tokenizers
7
+ gradio
8
+ requests
9
+ Pillow
10
+ moviepy
11
+ soundfile
12
+ numpy