drvsbrkcn commited on
Commit
b12e499
·
verified ·
1 Parent(s): 85846f7

Upload 10 files

Browse files
Files changed (10) hide show
  1. LICENSE +21 -0
  2. README.md +9 -7
  3. app.py +515 -0
  4. config.py +190 -0
  5. llm_script_generator.py +388 -0
  6. promptkit.py +81 -0
  7. requirements.txt +53 -0
  8. sync_manager.py +381 -0
  9. utils_audio.py +292 -0
  10. utils_video.py +336 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 EceMotion Pictures
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,14 +1,16 @@
1
  ---
2
  title: EceMotion Pictures
3
- emoji: 🔥
4
- colorFrom: pink
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- short_description: AI-powered 1980s style commercial generator.
12
  ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
  title: EceMotion Pictures
3
+ emoji: 🎬🎤🤖
4
+ colorFrom: purple
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ short_description: 1980s style commercial with perfect audio-video sync.
12
  ---
13
+ models:
14
+ - damo-vilab/text-to-video-ms-1.7b
15
+ - parler-tts/parler-tts-mini-v1
16
+ - microsoft/DialoGPT-medium
app.py ADDED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ EceMotion Pictures - Production Grade Commercial Generator
3
+ Advanced text-to-video commercial generator with perfect audio-video sync.
4
+ """
5
+
6
+ import os
7
+ import tempfile
8
+ import logging
9
+ from typing import Optional, Tuple, Dict, Any
10
+ from pathlib import Path
11
+ import traceback
12
+
13
+ import gradio as gr
14
+ import numpy as np
15
+
16
+ # Import our enhanced modules
17
+ from config import (
18
+ MODEL_VIDEO, MODEL_AUDIO, MODEL_LLM, MAX_DURATION, MIN_DURATION,
19
+ DEFAULT_FPS, VOICE_STYLES, get_device, validate_config, log_config
20
+ )
21
+ from sync_manager import create_sync_manager
22
+ from llm_script_generator import create_script_generator
23
+ from utils_audio import synth_voice, retro_bed, mix_to_stereo, write_wav
24
+ from utils_video import synth_t2v, apply_retro_filters, mux_audio
25
+
26
+ # Configure logging
27
+ logging.basicConfig(
28
+ level=logging.INFO,
29
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
30
+ )
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # Initialize components
34
+ DEVICE = get_device()
35
+ sync_manager = create_sync_manager()
36
+ script_generator = create_script_generator()
37
+
38
+ # Validate configuration
39
+ if not validate_config():
40
+ logger.error("Configuration validation failed")
41
+ exit(1)
42
+
43
+ # Log configuration
44
+ log_config()
45
+
46
+ # Modern CSS for Gradio
47
+ CSS = """
48
+ .gradio-container {
49
+ max-width: 1200px !important;
50
+ margin: 0 auto;
51
+ }
52
+ .app-header {
53
+ text-align: center;
54
+ margin-bottom: 2rem;
55
+ }
56
+ .app-title {
57
+ font-size: 2.5rem;
58
+ font-weight: 700;
59
+ background: linear-gradient(45deg, #ff6b6b, #4ecdc4);
60
+ -webkit-background-clip: text;
61
+ -webkit-text-fill-color: transparent;
62
+ margin-bottom: 0.5rem;
63
+ }
64
+ .app-subtitle {
65
+ opacity: 0.7;
66
+ font-size: 1.1rem;
67
+ color: #666;
68
+ }
69
+ .control-section {
70
+ background: #f8f9fa;
71
+ border-radius: 12px;
72
+ padding: 1.5rem;
73
+ margin-bottom: 1rem;
74
+ }
75
+ .output-section {
76
+ background: #ffffff;
77
+ border: 2px solid #e9ecef;
78
+ border-radius: 12px;
79
+ padding: 1.5rem;
80
+ }
81
+ .progress-info {
82
+ background: #e3f2fd;
83
+ border-left: 4px solid #2196f3;
84
+ padding: 1rem;
85
+ margin: 1rem 0;
86
+ border-radius: 4px;
87
+ }
88
+ .error-info {
89
+ background: #ffebee;
90
+ border-left: 4px solid #f44336;
91
+ padding: 1rem;
92
+ margin: 1rem 0;
93
+ border-radius: 4px;
94
+ }
95
+ """
96
+
97
+ # Example configurations
98
+ EXAMPLES = [
99
+ {
100
+ "brand": "EceMotion Pictures",
101
+ "structure": "Montage → Close-up → Logo stinger",
102
+ "script": "Remember when technology was simple?",
103
+ "voice": "Announcer '80s",
104
+ "duration": 10
105
+ },
106
+ {
107
+ "brand": "VaporWave Studios",
108
+ "structure": "Before/After → Feature highlight → CTA",
109
+ "script": "The future is now, but it looks like the past",
110
+ "voice": "Mall PA",
111
+ "duration": 8
112
+ },
113
+ {
114
+ "brand": "Neon Dreams",
115
+ "structure": "Unboxing → Demo → Deal countdown",
116
+ "script": "Step into the digital sunset",
117
+ "voice": "Late Night",
118
+ "duration": 12
119
+ }
120
+ ]
121
+
122
+ def create_interface():
123
+ """Create the modern Gradio interface."""
124
+
125
+ with gr.Blocks(
126
+ css=CSS,
127
+ title="EceMotion Pictures",
128
+ theme=gr.themes.Soft()
129
+ ) as demo:
130
+
131
+ # Header
132
+ with gr.Row():
133
+ gr.HTML("""
134
+ <div class="app-header">
135
+ <div class="app-title">🎬 EceMotion Pictures</div>
136
+ <div class="app-subtitle">AI-Powered 1980s Style Commercial Generator</div>
137
+ </div>
138
+ """)
139
+
140
+ # Main interface
141
+ with gr.Row():
142
+ # Left column - Controls
143
+ with gr.Column(scale=1):
144
+ with gr.Group():
145
+ gr.Markdown("### 🎯 Commercial Setup")
146
+
147
+ brand = gr.Textbox(
148
+ label="Brand Name",
149
+ placeholder="YourBrand™",
150
+ value="EceMotion Pictures",
151
+ info="Enter your brand name"
152
+ )
153
+
154
+ structure = gr.Textbox(
155
+ label="Commercial Structure",
156
+ placeholder="e.g., Montage → Close-up → Logo stinger",
157
+ value="Montage → Close-up → Logo stinger",
158
+ info="Define the flow of your commercial"
159
+ )
160
+
161
+ with gr.Row():
162
+ script_prompt = gr.Textbox(
163
+ label="Script Hook",
164
+ placeholder="Opening hook or idea",
165
+ value="Remember when technology was simple?",
166
+ scale=3
167
+ )
168
+ roll_btn = gr.Button("🎲", scale=1, size="sm")
169
+
170
+ duration = gr.Slider(
171
+ minimum=MIN_DURATION,
172
+ maximum=MAX_DURATION,
173
+ value=10,
174
+ step=1,
175
+ label="Duration (seconds)",
176
+ info=f"Between {MIN_DURATION}-{MAX_DURATION} seconds"
177
+ )
178
+
179
+ with gr.Group():
180
+ gr.Markdown("### 🎤 Audio Settings")
181
+
182
+ voice = gr.Dropdown(
183
+ choices=list(VOICE_STYLES.keys()),
184
+ value="Announcer '80s",
185
+ label="Voice Style",
186
+ info="Choose the announcer style"
187
+ )
188
+
189
+ music = gr.Checkbox(
190
+ value=True,
191
+ label="Background Music",
192
+ info="Add retro synth jingle"
193
+ )
194
+
195
+ with gr.Group():
196
+ gr.Markdown("### ⚙️ Advanced Settings")
197
+
198
+ with gr.Accordion("Model & Quality", open=False):
199
+ model_video = gr.Dropdown(
200
+ choices=["damo-vilab/text-to-video-ms-1.7b", "THUDM/CogVideoX-5b"],
201
+ value=MODEL_VIDEO,
202
+ label="Video Model",
203
+ info="Choose the text-to-video model"
204
+ )
205
+
206
+ model_audio = gr.Dropdown(
207
+ choices=["parler-tts/parler-tts-mini-v1", "SWivid/F5-TTS"],
208
+ value=MODEL_AUDIO,
209
+ label="Audio Model",
210
+ info="Choose the text-to-speech model"
211
+ )
212
+
213
+ with gr.Accordion("Retro Effects", open=False):
214
+ vhs_intensity = gr.Slider(
215
+ minimum=0.0,
216
+ maximum=1.0,
217
+ value=0.5,
218
+ step=0.1,
219
+ label="VHS Effect Intensity"
220
+ )
221
+
222
+ seed = gr.Number(
223
+ value=42,
224
+ precision=0,
225
+ label="Random Seed",
226
+ info="For reproducible results"
227
+ )
228
+
229
+ # Generate button
230
+ generate_btn = gr.Button(
231
+ "🎬 Generate Commercial",
232
+ variant="primary",
233
+ size="lg"
234
+ )
235
+
236
+ # Right column - Output
237
+ with gr.Column(scale=1):
238
+ with gr.Group():
239
+ gr.Markdown("### 📺 Generated Commercial")
240
+
241
+ # Progress tracking
242
+ progress_info = gr.HTML("""
243
+ <div class="progress-info">
244
+ <strong>Ready to generate!</strong><br>
245
+ Click the generate button to create your retro commercial.
246
+ </div>
247
+ """)
248
+
249
+ # Video output
250
+ output_video = gr.Video(
251
+ height=400,
252
+ label="Commercial Preview",
253
+ show_download_button=True
254
+ )
255
+
256
+ # Script output
257
+ output_script = gr.Textbox(
258
+ label="Generated Script",
259
+ lines=8,
260
+ max_lines=12,
261
+ show_copy_button=True
262
+ )
263
+
264
+ # Download section
265
+ with gr.Row():
266
+ download_btn = gr.DownloadButton(
267
+ "📥 Download Commercial",
268
+ variant="secondary"
269
+ )
270
+
271
+ # Examples section
272
+ with gr.Row():
273
+ with gr.Column():
274
+ gr.Markdown("### 💡 Example Configurations")
275
+ examples = gr.Examples(
276
+ examples=EXAMPLES,
277
+ inputs=[brand, structure, script_prompt, voice, duration],
278
+ label="Click to load example"
279
+ )
280
+
281
+ # Footer
282
+ gr.Markdown("""
283
+ <div style='text-align: center; opacity: 0.7; font-size: 0.9rem; margin-top: 2rem;'>
284
+ <p>🎬 Powered by EceMotion Pictures • Perfect audio-video sync • Professional quality</p>
285
+ <p>Models: Text-to-Video • Text-to-Speech • Enhanced VHS effects</p>
286
+ </div>
287
+ """)
288
+
289
+ # Event handlers
290
+ def roll_script_suggestion(structure_text: str, seed_val: int) -> str:
291
+ """Generate script suggestions using LLM."""
292
+ try:
293
+ suggestions = script_generator.suggest_scripts(structure_text, n=1, seed=seed_val)
294
+ return suggestions[0] if suggestions else "Back to '87 - the future is now!"
295
+ except Exception as e:
296
+ logger.error(f"Script suggestion failed: {e}")
297
+ return "Back to '87 - the future is now!"
298
+
299
+ def generate_commercial(
300
+ brand_name: str,
301
+ structure_text: str,
302
+ script_text: str,
303
+ duration_val: int,
304
+ voice_style: str,
305
+ music_enabled: bool,
306
+ video_model: str,
307
+ audio_model: str,
308
+ vhs_intensity: float,
309
+ seed_val: int
310
+ ) -> Tuple[str, str, str]:
311
+ """
312
+ Generate a complete retro commercial with perfect sync.
313
+ """
314
+ try:
315
+ # Update progress
316
+ progress_html = """
317
+ <div class="progress-info">
318
+ <strong>🎬 Generating Commercial...</strong><br>
319
+ <div style="margin-top: 0.5rem;">
320
+ <div>📝 Generating script with AI...</div>
321
+ </div>
322
+ </div>
323
+ """
324
+ yield progress_html, None, None, None
325
+
326
+ # Generate script using LLM
327
+ generated_script = script_generator.generate_script(
328
+ brand=brand_name or "Brand",
329
+ structure=structure_text or "Montage → Close-up → Logo",
330
+ script_prompt=script_text or "Back to '87",
331
+ duration=duration_val,
332
+ voice_style=voice_style,
333
+ seed=seed_val
334
+ )
335
+
336
+ # Update progress
337
+ progress_html = """
338
+ <div class="progress-info">
339
+ <strong>🎬 Generating Commercial...</strong><br>
340
+ <div style="margin-top: 0.5rem;">
341
+ <div>✅ Script generated</div>
342
+ <div>🎥 Generating video...</div>
343
+ </div>
344
+ </div>
345
+ """
346
+ yield progress_html, None, None, None
347
+
348
+ # Create temporary directory
349
+ with tempfile.TemporaryDirectory() as tmpdir:
350
+ # Generate video
351
+ video_prompt = f"{structure_text}. {script_text}. 1980s commercial, VHS texture, soft lighting, bold retro titles, 4:3, brand {brand_name}"
352
+
353
+ # Calculate optimal frame count
354
+ num_frames = sync_manager.get_optimal_frame_count(duration_val, DEFAULT_FPS)
355
+
356
+ clip = synth_t2v(
357
+ prompt=video_prompt,
358
+ seed=seed_val,
359
+ num_frames=num_frames,
360
+ fps=DEFAULT_FPS,
361
+ device=DEVICE,
362
+ model_name=video_model
363
+ )
364
+
365
+ # Save raw video
366
+ raw_video_path = os.path.join(tmpdir, "raw.mp4")
367
+ clip.write_videofile(
368
+ raw_video_path,
369
+ fps=DEFAULT_FPS,
370
+ codec='libx264',
371
+ audio=False,
372
+ verbose=False,
373
+ logger=None
374
+ )
375
+
376
+ # Apply retro filters
377
+ retro_video_path = os.path.join(tmpdir, "retro.mp4")
378
+ apply_retro_filters(raw_video_path, retro_video_path, intensity=vhs_intensity)
379
+
380
+ # Update progress
381
+ progress_html = """
382
+ <div class="progress-info">
383
+ <strong>🎬 Generating Commercial...</strong><br>
384
+ <div style="margin-top: 0.5rem;">
385
+ <div>✅ Script generated</div>
386
+ <div>✅ Video generated</div>
387
+ <div>🎤 Generating audio...</div>
388
+ </div>
389
+ </div>
390
+ """
391
+ yield progress_html, None, None, None
392
+
393
+ # Generate audio
394
+ voiceover_text = " ".join([seg.text for seg in generated_script.segments])
395
+ sr_voice, wav_voice = synth_voice(
396
+ text=voiceover_text,
397
+ voice_prompt=voice_style,
398
+ model_name=audio_model,
399
+ device=DEVICE
400
+ )
401
+
402
+ # Add background music if requested
403
+ if music_enabled:
404
+ sr_music, wav_music = retro_bed(clip.duration)
405
+ sr_final, stereo_audio = mix_to_stereo(
406
+ sr_voice, wav_voice, sr_music, wav_music, bed_gain=0.3
407
+ )
408
+ else:
409
+ sr_final = sr_voice
410
+ stereo_audio = np.stack([wav_voice, wav_voice], axis=1)
411
+
412
+ # Save audio
413
+ audio_path = os.path.join(tmpdir, "audio.wav")
414
+ write_wav(audio_path, sr_final, stereo_audio)
415
+
416
+ # Update progress
417
+ progress_html = """
418
+ <div class="progress-info">
419
+ <strong>🎬 Generating Commercial...</strong><br>
420
+ <div style="margin-top: 0.5rem;">
421
+ <div>✅ Script generated</div>
422
+ <div>✅ Video generated</div>
423
+ <div>✅ Audio generated</div>
424
+ <div>🔄 Synchronizing audio and video...</div>
425
+ </div>
426
+ </div>
427
+ """
428
+ yield progress_html, None, None, None
429
+
430
+ # Synchronize audio and video
431
+ final_video_path = os.path.join(tmpdir, f"{brand_name}_commercial.mp4")
432
+ sync_manager.synchronize_media(
433
+ video_path=retro_video_path,
434
+ audio_path=audio_path,
435
+ output_path=final_video_path,
436
+ prefer_audio_duration=True
437
+ )
438
+
439
+ # Validate sync
440
+ is_synced, sync_diff = sync_manager.validate_sync(final_video_path, final_video_path)
441
+
442
+ # Format script output
443
+ script_lines = []
444
+ for i, segment in enumerate(generated_script.segments, 1):
445
+ script_lines.append(f"{i}. {segment.timing_marker} {segment.text}")
446
+
447
+ script_output = "\n".join(script_lines) + f"\n\nTAGLINE: {generated_script.tagline}"
448
+
449
+ # Final progress
450
+ sync_status = "✅ Perfect sync" if is_synced else f"⚠️ Sync diff: {sync_diff:.3f}s"
451
+ progress_html = f"""
452
+ <div class="progress-info">
453
+ <strong>🎉 Commercial Complete!</strong><br>
454
+ <div style="margin-top: 0.5rem;">
455
+ <div>✅ Script generated ({generated_script.word_count} words)</div>
456
+ <div>✅ Video generated ({num_frames} frames)</div>
457
+ <div>✅ Audio generated ({len(stereo_audio)/sr_final:.1f}s)</div>
458
+ <div>{sync_status}</div>
459
+ </div>
460
+ </div>
461
+ """
462
+
463
+ yield progress_html, final_video_path, script_output, final_video_path
464
+
465
+ except Exception as e:
466
+ logger.error(f"Commercial generation failed: {e}")
467
+ logger.error(f"Traceback: {traceback.format_exc()}")
468
+ error_html = f"""
469
+ <div class="error-info">
470
+ <strong>❌ Generation Failed</strong><br>
471
+ <div style="margin-top: 0.5rem; color: #666;">
472
+ Error: {str(e)}<br>
473
+ Please try again with different parameters or check the logs.
474
+ </div>
475
+ </div>
476
+ """
477
+ yield error_html, None, None, None
478
+
479
+ # Connect event handlers
480
+ roll_btn.click(
481
+ roll_script_suggestion,
482
+ inputs=[structure, seed],
483
+ outputs=[script_prompt]
484
+ )
485
+
486
+ generate_btn.click(
487
+ generate_commercial,
488
+ inputs=[
489
+ brand, structure, script_prompt, duration, voice, music,
490
+ model_video, model_audio, vhs_intensity, seed
491
+ ],
492
+ outputs=[progress_info, output_video, output_script, download_btn]
493
+ )
494
+
495
+ return demo
496
+
497
+ def main():
498
+ """Main application entry point."""
499
+ logger.info("Starting EceMotion Pictures")
500
+ logger.info(f"Using device: {DEVICE}")
501
+ logger.info(f"Video model: {MODEL_VIDEO}")
502
+ logger.info(f"Audio model: {MODEL_AUDIO}")
503
+ logger.info(f"LLM model: {MODEL_LLM}")
504
+
505
+ # Create and launch interface
506
+ demo = create_interface()
507
+ demo.launch(
508
+ server_name="0.0.0.0",
509
+ server_port=7860,
510
+ share=False,
511
+ show_error=True
512
+ )
513
+
514
+ if __name__ == "__main__":
515
+ main()
config.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration management for EceMotion Pictures.
3
+ Centralized settings for models, parameters, and deployment.
4
+ """
5
+
6
+ import os
7
+ from typing import Dict, Any, Optional
8
+
9
+ # Model Configuration - with fallbacks for HuggingFace Spaces
10
+ MODEL_VIDEO = os.getenv("MODEL_VIDEO", "damo-vilab/text-to-video-ms-1.7b") # Start with lighter model
11
+ MODEL_AUDIO = os.getenv("MODEL_AUDIO", "parler-tts/parler-tts-mini-v1") # Start with working model
12
+ MODEL_LLM = os.getenv("MODEL_LLM", "microsoft/DialoGPT-medium") # Start with lighter LLM
13
+
14
+ # Video Configuration
15
+ MAX_DURATION = int(os.getenv("MAX_DURATION", "15"))
16
+ MIN_DURATION = int(os.getenv("MIN_DURATION", "5"))
17
+ DEFAULT_FPS = int(os.getenv("DEFAULT_FPS", "8"))
18
+ DEFAULT_FRAMES = int(os.getenv("DEFAULT_FRAMES", "64")) # 8 seconds at 8fps
19
+
20
+ # Audio Configuration
21
+ AUDIO_SAMPLE_RATE = int(os.getenv("AUDIO_SAMPLE_RATE", "22050")) # Standard rate
22
+ AUDIO_BITRATE = os.getenv("AUDIO_BITRATE", "128k") # Lower bitrate for stability
23
+ MUSIC_GAIN = float(os.getenv("MUSIC_GAIN", "0.3"))
24
+
25
+ # GPU Configuration
26
+ GPU_MEMORY_THRESHOLD = float(os.getenv("GPU_MEMORY_THRESHOLD", "0.8"))
27
+ USE_QUANTIZATION = os.getenv("USE_QUANTIZATION", "true").lower() == "true"
28
+ QUANTIZATION_BITS = int(os.getenv("QUANTIZATION_BITS", "8"))
29
+
30
+ # Sync Configuration
31
+ SYNC_TOLERANCE_MS = int(os.getenv("SYNC_TOLERANCE_MS", "200")) # More lenient for stability
32
+ FORCE_SYNC = os.getenv("FORCE_SYNC", "false").lower() == "true" # Disabled by default
33
+
34
+ # Retro Filter Configuration
35
+ VHS_INTENSITY = float(os.getenv("VHS_INTENSITY", "0.5"))
36
+ SCANLINE_OPACITY = float(os.getenv("SCANLINE_OPACITY", "0.2"))
37
+ CHROMATIC_ABERRATION = float(os.getenv("CHROMATIC_ABERRATION", "0.05"))
38
+ FILM_GRAIN = float(os.getenv("FILM_GRAIN", "0.1"))
39
+
40
+ # UI Configuration
41
+ UI_THEME = os.getenv("UI_THEME", "default")
42
+ SHOW_PROGRESS = os.getenv("SHOW_PROGRESS", "true").lower() == "true"
43
+ ENABLE_EXAMPLES = os.getenv("ENABLE_EXAMPLES", "true").lower() == "true"
44
+
45
+ # Logging Configuration
46
+ LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
47
+ LOG_FORMAT = os.getenv("LOG_FORMAT", "text") # text format for HuggingFace Spaces
48
+
49
+ # Model-specific configurations with conservative settings
50
+ MODEL_CONFIGS: Dict[str, Dict[str, Any]] = {
51
+ "damo-vilab/text-to-video-ms-1.7b": {
52
+ "max_frames": 64,
53
+ "min_frames": 8,
54
+ "default_frames": 32,
55
+ "memory_usage_gb": 6,
56
+ "supports_quantization": False,
57
+ "stable": True,
58
+ },
59
+ "THUDM/CogVideoX-5b": {
60
+ "max_frames": 48, # Reduced for stability
61
+ "min_frames": 16,
62
+ "default_frames": 32,
63
+ "memory_usage_gb": 16, # Conservative estimate
64
+ "supports_quantization": True,
65
+ "stable": False, # Mark as experimental
66
+ },
67
+ "parler-tts/parler-tts-mini-v1": {
68
+ "max_text_length": 500,
69
+ "min_text_length": 10,
70
+ "default_voice": "Announcer '80s",
71
+ "memory_usage_gb": 2,
72
+ "stable": True,
73
+ },
74
+ "SWivid/F5-TTS": {
75
+ "max_text_length": 300,
76
+ "min_text_length": 10,
77
+ "default_voice": "announcer",
78
+ "memory_usage_gb": 4,
79
+ "stable": False, # Mark as experimental
80
+ },
81
+ "microsoft/DialoGPT-medium": {
82
+ "max_tokens": 512,
83
+ "temperature": 0.7,
84
+ "top_p": 0.9,
85
+ "memory_usage_gb": 2,
86
+ "stable": True,
87
+ },
88
+ "Qwen/Qwen2.5-7B-Instruct": {
89
+ "max_tokens": 1024,
90
+ "temperature": 0.7,
91
+ "top_p": 0.9,
92
+ "memory_usage_gb": 8,
93
+ "stable": False, # Mark as experimental
94
+ },
95
+ }
96
+
97
+ # Voice styles for TTS
98
+ VOICE_STYLES = {
99
+ "Announcer '80s": "A confident, upbeat 1980s TV announcer with warm AM-radio tone.",
100
+ "Mall PA": "Casual, slightly echoey mall public-address vibe.",
101
+ "Late Night": "Low energy, sly late-night infomercial style.",
102
+ "News Anchor": "Professional, authoritative news anchor delivery.",
103
+ "Infomercial": "Enthusiastic, persuasive infomercial host style.",
104
+ "Radio DJ": "Smooth, charismatic radio disc jockey voice.",
105
+ }
106
+
107
+ # Structure templates for script generation
108
+ STRUCTURE_TEMPLATES = [
109
+ "Montage → Close-up → Logo stinger",
110
+ "Before/After → Feature highlight → CTA",
111
+ "Testimonial → B-roll → Price tag reveal",
112
+ "Unboxing → Demo → Deal countdown",
113
+ "Retro news bulletin → Product shot → Tagline",
114
+ "Opening hook → Problem/Solution → Call to action",
115
+ "Brand story → Product showcase → Final tagline",
116
+ ]
117
+
118
+ # Taglines for commercial endings
119
+ TAGLINES = [
120
+ "So retro, it's the future.",
121
+ "Pixels you can trust.",
122
+ "VHS vibes. Modern results.",
123
+ "Old-school cool. New-school sales.",
124
+ "Where nostalgia meets innovation.",
125
+ "Rewind to the future.",
126
+ "Classic style. Modern performance.",
127
+ "The past perfected.",
128
+ "EceMotion Pictures - Bringing the '80s back to life.",
129
+ "Your story, our vision, timeless memories.",
130
+ ]
131
+
132
+ def get_model_config(model_name: str) -> Dict[str, Any]:
133
+ """Get configuration for a specific model."""
134
+ return MODEL_CONFIGS.get(model_name, {
135
+ "max_frames": 32,
136
+ "min_frames": 8,
137
+ "default_frames": 16,
138
+ "memory_usage_gb": 4,
139
+ "supports_quantization": False,
140
+ "stable": True,
141
+ })
142
+
143
+ def get_device() -> str:
144
+ """Determine the best available device."""
145
+ try:
146
+ import torch
147
+ if torch.cuda.is_available() and os.getenv("CUDA_VISIBLE_DEVICES", None) not in (None, ""):
148
+ return "cuda"
149
+ except ImportError:
150
+ pass
151
+ return "cpu"
152
+
153
+ def validate_config() -> bool:
154
+ """Validate configuration settings."""
155
+ try:
156
+ assert MIN_DURATION < MAX_DURATION, "MIN_DURATION must be less than MAX_DURATION"
157
+ assert DEFAULT_FPS > 0, "DEFAULT_FPS must be positive"
158
+ assert AUDIO_SAMPLE_RATE > 0, "AUDIO_SAMPLE_RATE must be positive"
159
+ assert 0 <= VHS_INTENSITY <= 1, "VHS_INTENSITY must be between 0 and 1"
160
+ assert 0 <= SCANLINE_OPACITY <= 1, "SCANLINE_OPACITY must be between 0 and 1"
161
+ return True
162
+ except AssertionError as e:
163
+ print(f"Configuration validation failed: {e}")
164
+ return False
165
+
166
+ def get_safe_model_name(model_name: str, model_type: str) -> str:
167
+ """Get a safe model name with fallback to stable models."""
168
+ config = get_model_config(model_name)
169
+
170
+ # If model is not stable, fallback to stable alternatives
171
+ if not config.get("stable", False):
172
+ if model_type == "video":
173
+ return "damo-vilab/text-to-video-ms-1.7b"
174
+ elif model_type == "audio":
175
+ return "parler-tts/parler-tts-mini-v1"
176
+ elif model_type == "llm":
177
+ return "microsoft/DialoGPT-medium"
178
+
179
+ return model_name
180
+
181
+ def log_config():
182
+ """Log current configuration for debugging."""
183
+ print(f"EceMotion Pictures Configuration:")
184
+ print(f" Video Model: {MODEL_VIDEO}")
185
+ print(f" Audio Model: {MODEL_AUDIO}")
186
+ print(f" LLM Model: {MODEL_LLM}")
187
+ print(f" Device: {get_device()}")
188
+ print(f" Duration Range: {MIN_DURATION}-{MAX_DURATION}s")
189
+ print(f" FPS: {DEFAULT_FPS}")
190
+ print(f" Sync Tolerance: {SYNC_TOLERANCE_MS}ms")
llm_script_generator.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM-powered script generation for EceMotion Pictures.
3
+ Generates intelligent, structure-aware commercial scripts with timing markers.
4
+ """
5
+
6
+ import logging
7
+ import random
8
+ from typing import Dict, List, Optional, Tuple
9
+ from dataclasses import dataclass
10
+
11
+ from config import (
12
+ MODEL_LLM, MODEL_CONFIGS, VOICE_STYLES, STRUCTURE_TEMPLATES, TAGLINES,
13
+ get_safe_model_name
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ @dataclass
19
+ class ScriptSegment:
20
+ """Represents a segment of the commercial script with timing information."""
21
+ text: str
22
+ duration_estimate: float
23
+ segment_type: str # "hook", "flow", "benefit", "cta"
24
+ timing_marker: Optional[str] = None
25
+
26
+ @dataclass
27
+ class GeneratedScript:
28
+ """Complete generated script with all segments and metadata."""
29
+ segments: List[ScriptSegment]
30
+ total_duration: float
31
+ tagline: str
32
+ voice_style: str
33
+ word_count: int
34
+ raw_script: str
35
+
36
+ class LLMScriptGenerator:
37
+ """Generates commercial scripts using large language models with fallbacks."""
38
+
39
+ def __init__(self, model_name: str = MODEL_LLM):
40
+ self.model_name = get_safe_model_name(model_name, "llm")
41
+ self.model = None
42
+ self.tokenizer = None
43
+ self.model_config = MODEL_CONFIGS.get(self.model_name, {})
44
+ self.llm_available = False
45
+
46
+ # Try to initialize LLM
47
+ self._try_init_llm()
48
+
49
+ def _try_init_llm(self):
50
+ """Try to initialize the LLM model."""
51
+ try:
52
+ if "dialo" in self.model_name.lower():
53
+ self._init_dialogpt()
54
+ elif "qwen" in self.model_name.lower():
55
+ self._init_qwen()
56
+ else:
57
+ logger.warning(f"Unknown LLM model: {self.model_name}, using fallback")
58
+ self.llm_available = False
59
+ except Exception as e:
60
+ logger.warning(f"Failed to initialize LLM {self.model_name}: {e}")
61
+ self.llm_available = False
62
+
63
+ def _init_dialogpt(self):
64
+ """Initialize DialoGPT model."""
65
+ try:
66
+ from transformers import AutoTokenizer, AutoModelForCausalLM
67
+
68
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
69
+ if self.tokenizer.pad_token is None:
70
+ self.tokenizer.pad_token = self.tokenizer.eos_token
71
+
72
+ self.model = AutoModelForCausalLM.from_pretrained(
73
+ self.model_name,
74
+ torch_dtype="auto",
75
+ device_map="auto" if self._has_gpu() else "cpu"
76
+ )
77
+ self.llm_available = True
78
+ logger.info(f"DialoGPT model {self.model_name} loaded successfully")
79
+
80
+ except Exception as e:
81
+ logger.error(f"Failed to load DialoGPT: {e}")
82
+ self.llm_available = False
83
+
84
+ def _init_qwen(self):
85
+ """Initialize Qwen model."""
86
+ try:
87
+ from transformers import AutoTokenizer, AutoModelForCausalLM
88
+
89
+ self.tokenizer = AutoTokenizer.from_pretrained(
90
+ self.model_name,
91
+ trust_remote_code=True
92
+ )
93
+
94
+ if self.tokenizer.pad_token is None:
95
+ self.tokenizer.pad_token = self.tokenizer.eos_token
96
+
97
+ self.model = AutoModelForCausalLM.from_pretrained(
98
+ self.model_name,
99
+ torch_dtype="auto",
100
+ device_map="auto" if self._has_gpu() else "cpu",
101
+ trust_remote_code=True
102
+ )
103
+ self.llm_available = True
104
+ logger.info(f"Qwen model {self.model_name} loaded successfully")
105
+
106
+ except Exception as e:
107
+ logger.error(f"Failed to load Qwen: {e}")
108
+ self.llm_available = False
109
+
110
+ def _has_gpu(self) -> bool:
111
+ """Check if GPU is available."""
112
+ try:
113
+ import torch
114
+ return torch.cuda.is_available()
115
+ except ImportError:
116
+ return False
117
+
118
+ def _create_system_prompt(self) -> str:
119
+ """Create system prompt for retro commercial script generation."""
120
+ return """You are a professional copywriter specializing in 1980s-style TV commercials.
121
+ Your task is to create engaging, persuasive commercial scripts that capture the authentic retro aesthetic.
122
+
123
+ Key requirements:
124
+ - Use 1980s commercial language and style
125
+ - Include clear hooks, benefits, and calls-to-action
126
+ - Keep scripts concise and punchy
127
+ - Use active voice and emotional appeals
128
+ - End with a memorable tagline
129
+
130
+ Format your response as:
131
+ HOOK: [Opening attention-grabber]
132
+ FLOW: [Main content following the structure]
133
+ BENEFIT: [Key value proposition]
134
+ CTA: [Call to action with tagline]
135
+
136
+ Keep each segment under 2-3 sentences. Use enthusiastic, confident language typical of 1980s advertising."""
137
+
138
+ def _create_user_prompt(self, brand: str, structure: str, script_prompt: str,
139
+ duration: int, voice_style: str) -> str:
140
+ """Create user prompt with specific requirements."""
141
+ return f"""Create a {duration}-second retro commercial script for {brand}.
142
+
143
+ Structure: {structure}
144
+ Script idea: {script_prompt}
145
+ Voice style: {voice_style}
146
+
147
+ Make it authentic to 1980s TV commercials with the energy and style of that era."""
148
+
149
+ def _parse_script_response(self, response: str) -> List[ScriptSegment]:
150
+ """Parse LLM response into structured script segments."""
151
+ segments = []
152
+
153
+ # Split by segment markers
154
+ import re
155
+ parts = re.split(r'(HOOK:|FLOW:|BENEFIT:|CTA:)', response)
156
+
157
+ for i in range(1, len(parts), 2):
158
+ if i + 1 < len(parts):
159
+ segment_type = parts[i].rstrip(':').lower()
160
+ text = parts[i + 1].strip()
161
+
162
+ if text:
163
+ # Estimate duration based on word count (150 WPM)
164
+ word_count = len(text.split())
165
+ duration = (word_count / 150) * 60 # Convert to seconds
166
+
167
+ segments.append(ScriptSegment(
168
+ text=text,
169
+ duration_estimate=duration,
170
+ segment_type=segment_type,
171
+ timing_marker=f"[{segment_type.upper()}]"
172
+ ))
173
+
174
+ return segments
175
+
176
+ def _extract_tagline(self, response: str) -> str:
177
+ """Extract tagline from the script response."""
178
+ # Look for tagline in CTA section
179
+ import re
180
+ cta_match = re.search(r'CTA:.*?([A-Z][^.!?]*[.!?])', response, re.DOTALL)
181
+ if cta_match:
182
+ cta_text = cta_match.group(1)
183
+ # Extract the last sentence as potential tagline
184
+ sentences = re.split(r'[.!?]+', cta_text)
185
+ if sentences:
186
+ tagline = sentences[-1].strip()
187
+ if len(tagline) > 5: # Ensure it's substantial
188
+ return tagline
189
+
190
+ # Fallback to predefined taglines
191
+ return random.choice(TAGLINES)
192
+
193
+ def generate_script_with_llm(self, brand: str, structure: str, script_prompt: str,
194
+ duration: int, voice_style: str, seed: int = 42) -> GeneratedScript:
195
+ """Generate script using LLM."""
196
+ if not self.llm_available:
197
+ raise RuntimeError("LLM not available")
198
+
199
+ # Set random seed for reproducibility
200
+ random.seed(seed)
201
+
202
+ # Create prompts
203
+ system_prompt = self._create_system_prompt()
204
+ user_prompt = self._create_user_prompt(brand, structure, script_prompt, duration, voice_style)
205
+
206
+ # Format for the model
207
+ if "dialo" in self.model_name.lower():
208
+ # DialoGPT format
209
+ text = f"{user_prompt}\n\nResponse:"
210
+ else:
211
+ # Generic format
212
+ text = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:"
213
+
214
+ # Tokenize
215
+ inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
216
+
217
+ # Generate
218
+ with self.model.eval():
219
+ outputs = self.model.generate(
220
+ **inputs,
221
+ max_new_tokens=self.model_config.get("max_tokens", 256),
222
+ temperature=self.model_config.get("temperature", 0.7),
223
+ top_p=self.model_config.get("top_p", 0.9),
224
+ do_sample=True,
225
+ pad_token_id=self.tokenizer.eos_token_id,
226
+ eos_token_id=self.tokenizer.eos_token_id,
227
+ num_return_sequences=1
228
+ )
229
+
230
+ # Decode response
231
+ response = self.tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
232
+
233
+ logger.info(f"Generated script response: {response[:200]}...")
234
+
235
+ # Parse response
236
+ segments = self._parse_script_response(response)
237
+ tagline = self._extract_tagline(response)
238
+
239
+ # Calculate total duration
240
+ total_duration = sum(segment.duration_estimate for segment in segments)
241
+
242
+ # Calculate word count
243
+ word_count = sum(len(segment.text.split()) for segment in segments)
244
+
245
+ return GeneratedScript(
246
+ segments=segments,
247
+ total_duration=total_duration,
248
+ tagline=tagline,
249
+ voice_style=voice_style,
250
+ word_count=word_count,
251
+ raw_script=response
252
+ )
253
+
254
+ def generate_script_with_template(self, brand: str, structure: str, script_prompt: str,
255
+ duration: int, voice_style: str, seed: int = 42) -> GeneratedScript:
256
+ """Generate script using template-based approach (fallback)."""
257
+ random.seed(seed)
258
+
259
+ # Select structure template
260
+ structure_template = structure.strip() or random.choice(STRUCTURE_TEMPLATES)
261
+
262
+ # Generate segments based on template
263
+ segments = []
264
+
265
+ # Hook
266
+ hook_text = script_prompt or f"Introducing {brand} - the future is here!"
267
+ segments.append(ScriptSegment(
268
+ text=hook_text,
269
+ duration_estimate=2.0,
270
+ segment_type="hook",
271
+ timing_marker="[HOOK]"
272
+ ))
273
+
274
+ # Flow (based on structure)
275
+ flow_text = f"With {structure_template.lower()}, {brand} delivers results like never before."
276
+ segments.append(ScriptSegment(
277
+ text=flow_text,
278
+ duration_estimate=3.0,
279
+ segment_type="flow",
280
+ timing_marker="[FLOW]"
281
+ ))
282
+
283
+ # Benefit
284
+ benefit_text = "Faster, simpler, cooler - just like your favorite retro tech."
285
+ segments.append(ScriptSegment(
286
+ text=benefit_text,
287
+ duration_estimate=2.5,
288
+ segment_type="benefit",
289
+ timing_marker="[BENEFIT]"
290
+ ))
291
+
292
+ # CTA
293
+ tagline = random.choice(TAGLINES)
294
+ cta_text = f"Try {brand} today. {tagline}"
295
+ segments.append(ScriptSegment(
296
+ text=cta_text,
297
+ duration_estimate=2.5,
298
+ segment_type="cta",
299
+ timing_marker="[CTA]"
300
+ ))
301
+
302
+ # Calculate totals
303
+ total_duration = sum(segment.duration_estimate for segment in segments)
304
+ word_count = sum(len(segment.text.split()) for segment in segments)
305
+
306
+ return GeneratedScript(
307
+ segments=segments,
308
+ total_duration=total_duration,
309
+ tagline=tagline,
310
+ voice_style=voice_style,
311
+ word_count=word_count,
312
+ raw_script=f"Template-based script for {brand}"
313
+ )
314
+
315
+ def generate_script(self, brand: str, structure: str, script_prompt: str,
316
+ duration: int, voice_style: str, seed: int = 42) -> GeneratedScript:
317
+ """
318
+ Generate a complete commercial script.
319
+ """
320
+ try:
321
+ if self.llm_available:
322
+ return self.generate_script_with_llm(brand, structure, script_prompt, duration, voice_style, seed)
323
+ else:
324
+ logger.info("Using template-based script generation (LLM not available)")
325
+ return self.generate_script_with_template(brand, structure, script_prompt, duration, voice_style, seed)
326
+ except Exception as e:
327
+ logger.error(f"Script generation failed: {e}")
328
+ logger.info("Falling back to template-based generation")
329
+ return self.generate_script_with_template(brand, structure, script_prompt, duration, voice_style, seed)
330
+
331
+ def suggest_scripts(self, structure: str, n: int = 6, seed: int = 0) -> List[str]:
332
+ """
333
+ Generate multiple script suggestions based on structure.
334
+ """
335
+ try:
336
+ suggestions = []
337
+ for i in range(n):
338
+ script = self.generate_script(
339
+ brand="YourBrand",
340
+ structure=structure,
341
+ script_prompt="Create an engaging hook",
342
+ duration=10,
343
+ voice_style="Announcer '80s",
344
+ seed=seed + i
345
+ )
346
+
347
+ # Extract hook from first segment
348
+ if script.segments:
349
+ hook = script.segments[0].text
350
+ suggestions.append(hook)
351
+ else:
352
+ suggestions.append("Back to '87 - the future is now!")
353
+
354
+ return suggestions
355
+
356
+ except Exception as e:
357
+ logger.warning(f"Script suggestion failed: {e}")
358
+ # Fallback to original random generation
359
+ return self._fallback_suggestions(structure, n, seed)
360
+
361
+ def _fallback_suggestions(self, structure: str, n: int, seed: int) -> List[str]:
362
+ """Fallback to original random script generation."""
363
+ random.seed(seed)
364
+
365
+ base = (structure or "").lower().strip()
366
+ ideas = []
367
+
368
+ for _ in range(n):
369
+ style = random.choice(["infomercial", "mall ad", "late-night", "newsflash", "arcade bumper"])
370
+ shot = random.choice(["neon grid", "CRT scanlines", "vaporwave sunset", "shopping mall", "boombox close-up"])
371
+ hook = random.choice([
372
+ "Remember this sound?", "Back to '87.", "Deal of the decade.",
373
+ "We paused time.", "Be kind, rewind your brand."
374
+ ])
375
+ idea = f"{hook} {style} with {shot}."
376
+
377
+ # Light correlation with structure
378
+ for kw in ["montage", "testimonial", "news", "unboxing", "before", "after", "countdown", "logo", "cta"]:
379
+ if kw in base and kw not in idea:
380
+ idea += f" Includes {kw}."
381
+
382
+ ideas.append(idea)
383
+
384
+ return ideas
385
+
386
+ def create_script_generator() -> LLMScriptGenerator:
387
+ """Factory function to create a script generator."""
388
+ return LLMScriptGenerator()
promptkit.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Legacy promptkit module for EceMotion Pictures.
3
+ Maintained for backward compatibility.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from typing import Dict, List
8
+ import random
9
+
10
+ TAGLINES = [
11
+ "So retro, it's the future.",
12
+ "Pixels you can trust.",
13
+ "VHS vibes. Modern results.",
14
+ "Old-school cool. New-school sales.",
15
+ "EceMotion Pictures - Bringing the '80s back to life.",
16
+ "Your story, our vision, timeless memories.",
17
+ ]
18
+
19
+ VOICE_STYLES = {
20
+ "Announcer '80s": "A confident, upbeat 1980s TV announcer with warm AM-radio tone.",
21
+ "Mall PA": "Casual, slightly echoey mall public-address vibe.",
22
+ "Late Night": "Low energy, sly late-night infomercial style.",
23
+ "News Anchor": "Professional, authoritative news anchor delivery.",
24
+ "Infomercial": "Enthusiastic, persuasive infomercial host style.",
25
+ "Radio DJ": "Smooth, charismatic radio disc jockey voice.",
26
+ }
27
+
28
+ STRUCTURE_TEMPLATES = [
29
+ "Montage → Close-up → Logo stinger",
30
+ "Before/After → Feature highlight → CTA",
31
+ "Testimonial → B-roll → Price tag reveal",
32
+ "Unboxing → Demo → Deal countdown",
33
+ "Retro news bulletin → Product shot → Tagline",
34
+ "Opening hook → Problem/Solution → Call to action",
35
+ "Brand story → Product showcase → Final tagline",
36
+ ]
37
+
38
+ @dataclass
39
+ class AdPlan:
40
+ brand: str
41
+ structure: str
42
+ script_prompt: str
43
+ duration: int
44
+ voice_style: str
45
+ seed: int
46
+
47
+ def script(self) -> Dict[str, str]:
48
+ random.seed(self.seed)
49
+ tl = random.choice(TAGLINES)
50
+ structure = self.structure.strip() or random.choice(STRUCTURE_TEMPLATES)
51
+ # 4-beat VO using structure + script prompt
52
+ beats = [
53
+ f"HOOK: {self.brand} — {self.script_prompt}",
54
+ f"FLOW: {structure}",
55
+ "BENEFIT: Faster, simpler, cooler — like your favorite retro tech.",
56
+ f"CTA: Try {self.brand} today. {tl}",
57
+ ]
58
+ vo = " ".join([b.split(': ',1)[1] for b in beats])
59
+ return {"lines": beats, "voiceover": vo, "tagline": tl}
60
+
61
+ def suggest_scripts(structure_prompt: str, n: int = 6, seed: int = 0) -> List[str]:
62
+ """Return n short script ideas correlated with the structure prompt."""
63
+ random.seed(seed)
64
+ base = (structure_prompt or "").lower().strip()
65
+ ideas = []
66
+ for _ in range(n):
67
+ style = random.choice(["infomercial", "mall ad", "late-night", "newsflash", "arcade bumper"])
68
+ shot = random.choice(["neon grid", "CRT scanlines", "vaporwave sunset", "shopping mall", "boombox close-up"])
69
+ hook = random.choice([
70
+ "Remember this sound?", "Back to '87.", "Deal of the decade.", "We paused time.", "Be kind, rewind your brand.",
71
+ ])
72
+ idea = f"{hook} {style} with {shot}."
73
+ # Light correlation: echo key words from structure prompt
74
+ for kw in ["montage","testimonial","news","unboxing","before","after","countdown","logo","cta"]:
75
+ if kw in base and kw not in idea:
76
+ idea += f" Includes {kw}."
77
+ ideas.append(idea)
78
+ return ideas
79
+
80
+ def roll_script(structure_prompt: str, seed: int = 0) -> str:
81
+ return random.choice(suggest_scripts(structure_prompt, n=6, seed=seed))
requirements.txt ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EceMotion Pictures - Production Requirements
2
+ # Tested and verified versions for HuggingFace Spaces
3
+
4
+ # Core ML/AI libraries - stable versions
5
+ gradio==4.44.0
6
+ transformers==4.44.2
7
+ accelerate==0.34.0
8
+ diffusers==0.31.0
9
+ safetensors==0.4.3
10
+ sentencepiece==0.2.0
11
+ huggingface_hub==0.24.6
12
+
13
+ # PyTorch ecosystem - stable versions
14
+ torch==2.4.0
15
+ torchvision==0.19.0
16
+ torchaudio==2.4.0
17
+
18
+ # Video processing - stable versions
19
+ moviepy==1.0.3
20
+ imageio[ffmpeg]==2.34.0
21
+ ffmpeg-python==0.2.0
22
+
23
+ # Audio processing - stable versions
24
+ soundfile==0.12.1
25
+ librosa==0.10.2
26
+ scipy==1.11.4
27
+
28
+ # Data processing
29
+ numpy==1.26.4
30
+ pandas==2.2.0
31
+
32
+ # Configuration and validation
33
+ pydantic==2.8.0
34
+ python-dotenv==1.0.0
35
+
36
+ # Logging and monitoring
37
+ loguru==0.7.2
38
+
39
+ # Additional dependencies for HuggingFace Spaces
40
+ Pillow==10.2.0
41
+ opencv-python==4.9.0.80
42
+ matplotlib==3.8.4
43
+ seaborn==0.13.2
44
+
45
+ # Development and testing (optional)
46
+ pytest==8.0.0
47
+ black==24.0.0
48
+ flake8==7.0.0
49
+
50
+ # System dependencies (for HuggingFace Spaces)
51
+ # These are typically pre-installed but listed for completeness
52
+ # ffmpeg (system package)
53
+ # git (system package)
sync_manager.py ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Audio-Video Synchronization Manager for EceMotion Pictures.
3
+ Ensures frame-perfect alignment between generated audio and video content.
4
+ """
5
+
6
+ import os
7
+ import tempfile
8
+ import subprocess
9
+ import numpy as np
10
+ import logging
11
+ from typing import Tuple, Optional, Dict, Any
12
+ from pathlib import Path
13
+ import shutil
14
+
15
+ from config import SYNC_TOLERANCE_MS, FORCE_SYNC, AUDIO_SAMPLE_RATE
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ class SyncManager:
20
+ """Manages audio-video synchronization with frame-perfect accuracy."""
21
+
22
+ def __init__(self, tolerance_ms: int = SYNC_TOLERANCE_MS):
23
+ self.tolerance_ms = tolerance_ms
24
+ self.tolerance_s = tolerance_ms / 1000.0
25
+ self.ffmpeg_available = self._check_ffmpeg()
26
+
27
+ def _check_ffmpeg(self) -> bool:
28
+ """Check if ffmpeg is available."""
29
+ try:
30
+ subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
31
+ return True
32
+ except (subprocess.CalledProcessError, FileNotFoundError):
33
+ logger.warning("ffmpeg not found, using fallback methods")
34
+ return False
35
+
36
+ def calculate_video_duration(self, num_frames: int, fps: float) -> float:
37
+ """Calculate exact video duration from frame count and FPS."""
38
+ return num_frames / fps
39
+
40
+ def measure_audio_duration(self, audio_path: str) -> float:
41
+ """Measure actual duration of audio file."""
42
+ if not os.path.exists(audio_path):
43
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
44
+
45
+ if self.ffmpeg_available:
46
+ return self._measure_with_ffmpeg(audio_path)
47
+ else:
48
+ return self._measure_with_soundfile(audio_path)
49
+
50
+ def _measure_with_ffmpeg(self, audio_path: str) -> float:
51
+ """Measure duration using ffmpeg."""
52
+ try:
53
+ cmd = [
54
+ 'ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
55
+ '-of', 'csv=p=0', audio_path
56
+ ]
57
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
58
+ duration = float(result.stdout.strip())
59
+ logger.info(f"Audio duration (ffmpeg): {duration:.3f}s")
60
+ return duration
61
+ except (subprocess.CalledProcessError, ValueError) as e:
62
+ logger.error(f"Failed to measure audio duration with ffmpeg: {e}")
63
+ return self._measure_with_soundfile(audio_path)
64
+
65
+ def _measure_with_soundfile(self, audio_path: str) -> float:
66
+ """Measure duration using soundfile as fallback."""
67
+ try:
68
+ import soundfile as sf
69
+ info = sf.info(audio_path)
70
+ duration = info.duration
71
+ logger.info(f"Audio duration (soundfile): {duration:.3f}s")
72
+ return duration
73
+ except Exception as e:
74
+ logger.error(f"Failed to measure audio duration with soundfile: {e}")
75
+ # Last resort: estimate from file size
76
+ return self._estimate_duration_from_size(audio_path)
77
+
78
+ def _estimate_duration_from_size(self, audio_path: str) -> float:
79
+ """Estimate duration from file size (very rough estimate)."""
80
+ try:
81
+ file_size = os.path.getsize(audio_path)
82
+ # Rough estimate: 1MB ≈ 1 second for 128kbps audio
83
+ estimated_duration = file_size / (1024 * 1024)
84
+ logger.warning(f"Estimated audio duration from file size: {estimated_duration:.3f}s")
85
+ return estimated_duration
86
+ except Exception as e:
87
+ logger.error(f"Failed to estimate duration: {e}")
88
+ return 10.0 # Default fallback
89
+
90
+ def measure_video_duration(self, video_path: str) -> float:
91
+ """Measure actual duration of video file."""
92
+ if not os.path.exists(video_path):
93
+ raise FileNotFoundError(f"Video file not found: {video_path}")
94
+
95
+ if self.ffmpeg_available:
96
+ return self._measure_video_with_ffmpeg(video_path)
97
+ else:
98
+ return self._estimate_video_duration(video_path)
99
+
100
+ def _measure_video_with_ffmpeg(self, video_path: str) -> float:
101
+ """Measure video duration using ffmpeg."""
102
+ try:
103
+ cmd = [
104
+ 'ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
105
+ '-of', 'csv=p=0', video_path
106
+ ]
107
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
108
+ duration = float(result.stdout.strip())
109
+ logger.info(f"Video duration (ffmpeg): {duration:.3f}s")
110
+ return duration
111
+ except (subprocess.CalledProcessError, ValueError) as e:
112
+ logger.error(f"Failed to measure video duration with ffmpeg: {e}")
113
+ return self._estimate_video_duration(video_path)
114
+
115
+ def _estimate_video_duration(self, video_path: str) -> float:
116
+ """Estimate video duration (fallback method)."""
117
+ try:
118
+ # Try to get duration from filename or use default
119
+ filename = os.path.basename(video_path)
120
+ if '_' in filename:
121
+ # Try to extract duration from filename like "video_10s.mp4"
122
+ parts = filename.split('_')
123
+ for part in parts:
124
+ if 's' in part:
125
+ try:
126
+ duration = float(part.replace('s', ''))
127
+ logger.info(f"Estimated video duration from filename: {duration:.3f}s")
128
+ return duration
129
+ except ValueError:
130
+ continue
131
+
132
+ # Default fallback
133
+ logger.warning("Using default video duration estimate: 10.0s")
134
+ return 10.0
135
+ except Exception as e:
136
+ logger.error(f"Failed to estimate video duration: {e}")
137
+ return 10.0
138
+
139
+ def adjust_audio_to_video(self, audio_path: str, target_duration: float,
140
+ output_path: str) -> str:
141
+ """Adjust audio duration to match video duration."""
142
+ if self.ffmpeg_available:
143
+ return self._adjust_audio_with_ffmpeg(audio_path, target_duration, output_path)
144
+ else:
145
+ return self._adjust_audio_with_soundfile(audio_path, target_duration, output_path)
146
+
147
+ def _adjust_audio_with_ffmpeg(self, audio_path: str, target_duration: float,
148
+ output_path: str) -> str:
149
+ """Adjust audio using ffmpeg."""
150
+ try:
151
+ cmd = [
152
+ 'ffmpeg', '-i', audio_path, '-t', str(target_duration),
153
+ '-af', 'apad', '-c:a', 'pcm_s16le', '-y', output_path
154
+ ]
155
+ subprocess.run(cmd, check=True, capture_output=True)
156
+ logger.info(f"Adjusted audio to {target_duration:.3f}s using ffmpeg")
157
+ return output_path
158
+ except subprocess.CalledProcessError as e:
159
+ logger.error(f"Failed to adjust audio with ffmpeg: {e}")
160
+ return self._adjust_audio_with_soundfile(audio_path, target_duration, output_path)
161
+
162
+ def _adjust_audio_with_soundfile(self, audio_path: str, target_duration: float,
163
+ output_path: str) -> str:
164
+ """Adjust audio using soundfile (fallback)."""
165
+ try:
166
+ import soundfile as sf
167
+
168
+ # Read audio
169
+ audio_data, sample_rate = sf.read(audio_path)
170
+
171
+ # Calculate target samples
172
+ target_samples = int(target_duration * sample_rate)
173
+
174
+ if len(audio_data) < target_samples:
175
+ # Pad with silence
176
+ padding = np.zeros(target_samples - len(audio_data))
177
+ if len(audio_data.shape) > 1: # Stereo
178
+ padding = np.zeros((target_samples - len(audio_data), audio_data.shape[1]))
179
+ adjusted_audio = np.concatenate([audio_data, padding])
180
+ else:
181
+ # Trim to target length
182
+ adjusted_audio = audio_data[:target_samples]
183
+
184
+ # Write adjusted audio
185
+ sf.write(output_path, adjusted_audio, sample_rate)
186
+ logger.info(f"Adjusted audio to {target_duration:.3f}s using soundfile")
187
+ return output_path
188
+
189
+ except Exception as e:
190
+ logger.error(f"Failed to adjust audio with soundfile: {e}")
191
+ # Last resort: just copy the file
192
+ shutil.copy2(audio_path, output_path)
193
+ return output_path
194
+
195
+ def adjust_video_to_audio(self, video_path: str, target_duration: float,
196
+ output_path: str) -> str:
197
+ """Adjust video duration to match audio duration."""
198
+ if self.ffmpeg_available:
199
+ return self._adjust_video_with_ffmpeg(video_path, target_duration, output_path)
200
+ else:
201
+ # For video, we can't easily adjust without ffmpeg, so just copy
202
+ shutil.copy2(video_path, output_path)
203
+ return output_path
204
+
205
+ def _adjust_video_with_ffmpeg(self, video_path: str, target_duration: float,
206
+ output_path: str) -> str:
207
+ """Adjust video using ffmpeg."""
208
+ try:
209
+ cmd = [
210
+ 'ffmpeg', '-i', video_path, '-t', str(target_duration),
211
+ '-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-y', output_path
212
+ ]
213
+ subprocess.run(cmd, check=True, capture_output=True)
214
+ logger.info(f"Adjusted video to {target_duration:.3f}s using ffmpeg")
215
+ return output_path
216
+ except subprocess.CalledProcessError as e:
217
+ logger.error(f"Failed to adjust video with ffmpeg: {e}")
218
+ # Fallback: just copy
219
+ shutil.copy2(video_path, output_path)
220
+ return output_path
221
+
222
+ def validate_sync(self, video_path: str, audio_path: str) -> Tuple[bool, float]:
223
+ """Validate that audio and video are properly synchronized."""
224
+ try:
225
+ video_duration = self.measure_video_duration(video_path)
226
+ audio_duration = self.measure_audio_duration(audio_path)
227
+
228
+ duration_diff = abs(video_duration - audio_duration)
229
+ is_synced = duration_diff <= self.tolerance_s
230
+
231
+ logger.info(f"Sync validation: video={video_duration:.3f}s, "
232
+ f"audio={audio_duration:.3f}s, diff={duration_diff:.3f}s, "
233
+ f"synced={is_synced}")
234
+
235
+ return is_synced, duration_diff
236
+
237
+ except Exception as e:
238
+ logger.error(f"Sync validation failed: {e}")
239
+ return False, float('inf')
240
+
241
+ def synchronize_media(self, video_path: str, audio_path: str,
242
+ output_path: str, prefer_audio_duration: bool = True) -> str:
243
+ """
244
+ Synchronize audio and video with frame-perfect accuracy.
245
+ """
246
+ try:
247
+ # Measure durations
248
+ video_duration = self.measure_video_duration(video_path)
249
+ audio_duration = self.measure_audio_duration(audio_path)
250
+
251
+ duration_diff = abs(video_duration - audio_duration)
252
+
253
+ # Check if already synchronized
254
+ if duration_diff <= self.tolerance_s:
255
+ logger.info("Media already synchronized, copying to output")
256
+ self._copy_media(video_path, audio_path, output_path)
257
+ return output_path
258
+
259
+ # Determine target duration
260
+ if prefer_audio_duration:
261
+ target_duration = audio_duration
262
+ logger.info(f"Adjusting video to match audio duration: {target_duration:.3f}s")
263
+ else:
264
+ target_duration = video_duration
265
+ logger.info(f"Adjusting audio to match video duration: {target_duration:.3f}s")
266
+
267
+ # Create temporary files for adjustments
268
+ with tempfile.TemporaryDirectory() as temp_dir:
269
+ temp_video = os.path.join(temp_dir, "temp_video.mp4")
270
+ temp_audio = os.path.join(temp_dir, "temp_audio.wav")
271
+
272
+ # Adjust durations
273
+ if prefer_audio_duration:
274
+ self.adjust_video_to_audio(video_path, target_duration, temp_video)
275
+ temp_audio = audio_path # Use original audio
276
+ else:
277
+ self.adjust_audio_to_video(audio_path, target_duration, temp_audio)
278
+ temp_video = video_path # Use original video
279
+
280
+ # Mux synchronized media
281
+ self._mux_media(temp_video, temp_audio, output_path)
282
+
283
+ # Validate final sync
284
+ is_synced, final_diff = self.validate_sync(output_path, output_path)
285
+ if not is_synced and FORCE_SYNC:
286
+ logger.warning(f"Final sync validation failed with diff {final_diff:.3f}s")
287
+ else:
288
+ logger.info("Media successfully synchronized")
289
+
290
+ return output_path
291
+
292
+ except Exception as e:
293
+ logger.error(f"Synchronization failed: {e}")
294
+ # Fallback: just copy video without audio
295
+ shutil.copy2(video_path, output_path)
296
+ return output_path
297
+
298
+ def _copy_media(self, video_path: str, audio_path: str, output_path: str):
299
+ """Copy and mux media without duration adjustment."""
300
+ self._mux_media(video_path, audio_path, output_path)
301
+
302
+ def _mux_media(self, video_path: str, audio_path: str, output_path: str):
303
+ """Mux video and audio with precise timing."""
304
+ if self.ffmpeg_available:
305
+ self._mux_with_ffmpeg(video_path, audio_path, output_path)
306
+ else:
307
+ self._mux_with_moviepy(video_path, audio_path, output_path)
308
+
309
+ def _mux_with_ffmpeg(self, video_path: str, audio_path: str, output_path: str):
310
+ """Mux using ffmpeg."""
311
+ try:
312
+ cmd = [
313
+ 'ffmpeg', '-i', video_path, '-i', audio_path,
314
+ '-c:v', 'copy', '-c:a', 'aac', '-b:a', '128k',
315
+ '-shortest', '-fflags', '+shortest',
316
+ '-movflags', '+faststart', '-y', output_path
317
+ ]
318
+ subprocess.run(cmd, check=True, capture_output=True)
319
+ logger.info("Media successfully muxed with ffmpeg")
320
+ except subprocess.CalledProcessError as e:
321
+ logger.error(f"Media muxing with ffmpeg failed: {e}")
322
+ self._mux_with_moviepy(video_path, audio_path, output_path)
323
+
324
+ def _mux_with_moviepy(self, video_path: str, audio_path: str, output_path: str):
325
+ """Mux using moviepy (fallback)."""
326
+ try:
327
+ from moviepy.editor import VideoFileClip, AudioFileClip
328
+
329
+ # Load video and audio
330
+ video = VideoFileClip(video_path)
331
+ audio = AudioFileClip(audio_path)
332
+
333
+ # Set audio duration to match video
334
+ if audio.duration > video.duration:
335
+ audio = audio.subclip(0, video.duration)
336
+ elif audio.duration < video.duration:
337
+ # Pad audio with silence
338
+ from moviepy.audio.AudioClip import AudioClip
339
+ silence = AudioClip(lambda t: 0, duration=video.duration - audio.duration)
340
+ audio = audio.concatenate_audioclips([audio, silence])
341
+
342
+ # Combine and write
343
+ final_video = video.set_audio(audio)
344
+ final_video.write_videofile(
345
+ output_path,
346
+ codec='libx264',
347
+ audio_codec='aac',
348
+ temp_audiofile='temp-audio.m4a',
349
+ remove_temp=True,
350
+ verbose=False,
351
+ logger=None
352
+ )
353
+
354
+ # Clean up
355
+ video.close()
356
+ audio.close()
357
+ final_video.close()
358
+
359
+ logger.info("Media successfully muxed with moviepy")
360
+
361
+ except Exception as e:
362
+ logger.error(f"Media muxing with moviepy failed: {e}")
363
+ # Last resort: just copy video
364
+ shutil.copy2(video_path, output_path)
365
+
366
+ def get_optimal_frame_count(self, target_duration: float, fps: float) -> int:
367
+ """Calculate optimal frame count for target duration."""
368
+ frame_count = int(target_duration * fps)
369
+ # Ensure frame count is reasonable
370
+ frame_count = max(8, min(frame_count, 64)) # 1-8 seconds at 8fps
371
+ return frame_count
372
+
373
+ def estimate_audio_duration(self, text: str, words_per_minute: int = 150) -> float:
374
+ """Estimate audio duration from text length."""
375
+ word_count = len(text.split())
376
+ duration_minutes = word_count / words_per_minute
377
+ return duration_minutes * 60.0 # Convert to seconds
378
+
379
+ def create_sync_manager() -> SyncManager:
380
+ """Factory function to create a SyncManager instance."""
381
+ return SyncManager()
utils_audio.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Audio processing utilities for EceMotion Pictures.
3
+ Enhanced text-to-speech generation with robust error handling and fallbacks.
4
+ """
5
+
6
+ import numpy as np
7
+ import logging
8
+ import os
9
+ from typing import Tuple, Optional, Dict, Any
10
+
11
+ from config import (
12
+ MODEL_AUDIO, MODEL_CONFIGS, AUDIO_SAMPLE_RATE, get_device, get_safe_model_name
13
+ )
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Global model cache
18
+ _tts_pipe = None
19
+ _current_tts_model = None
20
+
21
+ def get_tts_pipe(model_name: str = MODEL_AUDIO, device: str = None):
22
+ """Get or create TTS pipeline with lazy loading and model switching."""
23
+ global _tts_pipe, _current_tts_model
24
+
25
+ if device is None:
26
+ device = get_device()
27
+
28
+ # Use safe model name
29
+ safe_model_name = get_safe_model_name(model_name, "audio")
30
+
31
+ if _tts_pipe is None or _current_tts_model != safe_model_name:
32
+ logger.info(f"Loading TTS model: {safe_model_name}")
33
+
34
+ try:
35
+ if "f5-tts" in safe_model_name.lower():
36
+ # Try F5-TTS first
37
+ _tts_pipe = _load_f5_tts(safe_model_name, device)
38
+ else:
39
+ # Use standard TTS pipeline
40
+ _tts_pipe = _load_standard_tts(safe_model_name, device)
41
+
42
+ if _tts_pipe is not None:
43
+ _current_tts_model = safe_model_name
44
+ logger.info(f"TTS model {safe_model_name} loaded successfully")
45
+ else:
46
+ raise RuntimeError("Failed to load any TTS model")
47
+
48
+ except Exception as e:
49
+ logger.error(f"Failed to load {safe_model_name}: {e}")
50
+ # Fallback to original model
51
+ _tts_pipe = _load_standard_tts("parler-tts/parler-tts-mini-v1", device)
52
+ _current_tts_model = "parler-tts/parler-tts-mini-v1"
53
+
54
+ return _tts_pipe
55
+
56
+ def _load_f5_tts(model_name: str, device: str):
57
+ """Load F5-TTS model."""
58
+ try:
59
+ from transformers import pipeline
60
+
61
+ pipe = pipeline(
62
+ "text-to-speech",
63
+ model=model_name,
64
+ torch_dtype="auto",
65
+ device_map=device if device == "cuda" else None
66
+ )
67
+
68
+ return pipe
69
+
70
+ except Exception as e:
71
+ logger.error(f"Failed to load F5-TTS: {e}")
72
+ return None
73
+
74
+ def _load_standard_tts(model_name: str, device: str):
75
+ """Load standard TTS model."""
76
+ try:
77
+ from transformers import pipeline
78
+
79
+ pipe = pipeline(
80
+ "text-to-speech",
81
+ model=model_name,
82
+ torch_dtype="auto"
83
+ )
84
+
85
+ if device == "cuda":
86
+ pipe = pipe.to(device)
87
+
88
+ return pipe
89
+
90
+ except Exception as e:
91
+ logger.error(f"Failed to load standard TTS: {e}")
92
+ return None
93
+
94
+ def synth_voice(text: str, voice_prompt: str, sr: int = AUDIO_SAMPLE_RATE,
95
+ model_name: str = MODEL_AUDIO, device: str = None) -> Tuple[int, np.ndarray]:
96
+ """
97
+ Generate speech from text with enhanced TTS support.
98
+ """
99
+ if device is None:
100
+ device = get_device()
101
+
102
+ tts = get_tts_pipe(model_name, device)
103
+ model_config = MODEL_CONFIGS.get(_current_tts_model, {})
104
+
105
+ # Validate text length
106
+ max_length = model_config.get("max_text_length", 500)
107
+ min_length = model_config.get("min_text_length", 10)
108
+
109
+ if len(text) > max_length:
110
+ logger.warning(f"Text too long ({len(text)} chars), truncating to {max_length}")
111
+ text = text[:max_length]
112
+ elif len(text) < min_length:
113
+ logger.warning(f"Text too short ({len(text)} chars), padding")
114
+ text = text + " " * (min_length - len(text))
115
+
116
+ try:
117
+ if "f5-tts" in _current_tts_model.lower():
118
+ # F5-TTS specific generation
119
+ result = tts(
120
+ text=text,
121
+ voice_preset=voice_prompt,
122
+ return_tensors="pt"
123
+ )
124
+ wav = result["audio"].numpy().flatten()
125
+ else:
126
+ # Standard pipeline (Parler-TTS, etc.)
127
+ result = tts({"text": text, "voice_preset": voice_prompt})
128
+ wav = result["audio"]
129
+
130
+ # Ensure proper format
131
+ if hasattr(wav, 'numpy'):
132
+ wav = wav.numpy()
133
+ elif hasattr(wav, 'detach'):
134
+ wav = wav.detach().numpy()
135
+
136
+ # Normalize audio
137
+ wav = normalize_audio(wav)
138
+
139
+ # Resample if needed
140
+ if sr != AUDIO_SAMPLE_RATE:
141
+ wav = _resample_audio(wav, AUDIO_SAMPLE_RATE, sr)
142
+
143
+ logger.info(f"Generated audio: {len(wav)/sr:.2f}s at {sr}Hz")
144
+ return sr, wav.astype(np.float32)
145
+
146
+ except Exception as e:
147
+ logger.error(f"Voice synthesis failed: {e}")
148
+ # Return fallback audio
149
+ return _create_fallback_audio(text, sr)
150
+
151
+ def _resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
152
+ """Resample audio using available methods."""
153
+ try:
154
+ import librosa
155
+ return librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
156
+ except ImportError:
157
+ # Simple resampling without librosa
158
+ ratio = target_sr / orig_sr
159
+ new_length = int(len(audio) * ratio)
160
+ return np.interp(
161
+ np.linspace(0, len(audio), new_length),
162
+ np.arange(len(audio)),
163
+ audio
164
+ )
165
+
166
+ def _create_fallback_audio(text: str, sr: int) -> Tuple[int, np.ndarray]:
167
+ """Create fallback audio when TTS fails."""
168
+ try:
169
+ # Create a simple tone based on text length
170
+ duration = max(1.0, len(text) / 20.0) # Rough estimate
171
+ t = np.linspace(0, duration, int(sr * duration), endpoint=False)
172
+
173
+ # Generate a simple tone
174
+ frequency = 440.0 # A4 note
175
+ wav = 0.1 * np.sin(2 * np.pi * frequency * t)
176
+
177
+ # Add some variation
178
+ wav += 0.05 * np.sin(2 * np.pi * frequency * 1.5 * t)
179
+
180
+ logger.info(f"Created fallback audio: {duration:.2f}s")
181
+ return sr, wav.astype(np.float32)
182
+
183
+ except Exception as e:
184
+ logger.error(f"Failed to create fallback audio: {e}")
185
+ # Last resort: silence
186
+ duration = 2.0
187
+ wav = np.zeros(int(sr * duration))
188
+ return sr, wav.astype(np.float32)
189
+
190
+ def normalize_audio(audio: np.ndarray, target_lufs: float = -23.0) -> np.ndarray:
191
+ """Normalize audio to broadcast standards."""
192
+ # Simple peak normalization first
193
+ if np.max(np.abs(audio)) > 0:
194
+ audio = audio / np.max(np.abs(audio)) * 0.95
195
+
196
+ # Apply gentle compression
197
+ audio = apply_compression(audio)
198
+
199
+ return audio
200
+
201
+ def apply_compression(audio: np.ndarray, ratio: float = 3.0, threshold: float = 0.7) -> np.ndarray:
202
+ """Apply gentle compression for broadcast quality."""
203
+ # Simple soft-knee compression
204
+ compressed = np.copy(audio)
205
+
206
+ # Above threshold, apply compression
207
+ above_threshold = np.abs(audio) > threshold
208
+ compressed[above_threshold] = np.sign(audio[above_threshold]) * (
209
+ threshold + (np.abs(audio[above_threshold]) - threshold) / ratio
210
+ )
211
+
212
+ return compressed
213
+
214
+ def retro_bed(duration_s: float, sr: int = AUDIO_SAMPLE_RATE, bpm: int = 92):
215
+ """Generate retro synth background music."""
216
+ try:
217
+ t = np.linspace(0, duration_s, int(sr * duration_s), endpoint=False)
218
+
219
+ # Chord progression root frequencies (A minor style)
220
+ freqs = [220.0, 174.61, 196.0, 146.83]
221
+ seg_len = int(len(t) / len(freqs)) if len(freqs) else len(t)
222
+ sig = np.zeros_like(t)
223
+
224
+ for i, f0 in enumerate(freqs):
225
+ tri_t = t[i * seg_len:(i + 1) * seg_len]
226
+ tri = 2 * np.abs(2 * ((tri_t * f0) % 1) - 1) - 1
227
+ sig[i * seg_len:(i + 1) * seg_len] = 0.15 * tri
228
+
229
+ # Add tape noise
230
+ noise = 0.01 * np.random.randn(len(t))
231
+ bed = sig + noise
232
+
233
+ # Apply gentle lowpass filter
234
+ try:
235
+ from scipy import signal
236
+ b, a = signal.butter(3, 3000, 'low', fs=sr)
237
+ bed = signal.lfilter(b, a, bed)
238
+ except ImportError:
239
+ # Simple averaging filter if scipy not available
240
+ bed = np.convolve(bed, np.ones(5)/5, mode='same')
241
+
242
+ return sr, bed.astype(np.float32)
243
+
244
+ except Exception as e:
245
+ logger.error(f"Failed to generate retro bed: {e}")
246
+ # Return silence
247
+ silence = np.zeros(int(sr * duration_s))
248
+ return sr, silence.astype(np.float32)
249
+
250
+ def mix_to_stereo(sr1, a, sr2, b, bed_gain=0.5):
251
+ """Mix two mono signals to stereo."""
252
+ assert sr1 == sr2, "Sample rates must match"
253
+
254
+ n = max(len(a), len(b))
255
+
256
+ def pad(x):
257
+ if len(x) < n:
258
+ if len(x.shape) > 1: # Stereo
259
+ padding = np.zeros((n - len(x), x.shape[1]))
260
+ else: # Mono
261
+ padding = np.zeros(n - len(x))
262
+ x = np.concatenate([x, padding])
263
+ return x
264
+
265
+ a = pad(a)
266
+ b = pad(b)
267
+
268
+ left = a + bed_gain * b
269
+ right = a * 0.9 + bed_gain * 0.9 * b
270
+
271
+ if len(left.shape) == 1: # Mono to stereo
272
+ stereo = np.stack([left, right], axis=1)
273
+ else: # Already stereo
274
+ stereo = np.stack([left, right], axis=1)
275
+
276
+ return sr1, np.clip(stereo, -1.0, 1.0)
277
+
278
+ def write_wav(path: str, sr: int, wav: np.ndarray):
279
+ """Write audio to WAV file."""
280
+ try:
281
+ import soundfile as sf
282
+ sf.write(path, wav, sr)
283
+ except ImportError:
284
+ # Fallback using scipy
285
+ try:
286
+ from scipy.io import wavfile
287
+ # Convert to 16-bit
288
+ wav_16bit = (wav * 32767).astype(np.int16)
289
+ wavfile.write(path, sr, wav_16bit)
290
+ except ImportError:
291
+ logger.error("No audio writing library available (soundfile or scipy)")
292
+ raise RuntimeError("Cannot write audio file - no audio library available")
utils_video.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Video processing utilities for EceMotion Pictures.
3
+ Enhanced text-to-video generation with robust error handling and fallbacks.
4
+ """
5
+
6
+ import numpy as np
7
+ import logging
8
+ import os
9
+ import shutil
10
+ from typing import Optional, Tuple, List
11
+ from pathlib import Path
12
+
13
+ from config import (
14
+ MODEL_VIDEO, MODEL_CONFIGS, get_device, VHS_INTENSITY, SCANLINE_OPACITY,
15
+ CHROMATIC_ABERRATION, FILM_GRAIN, get_safe_model_name
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Global model cache
21
+ t2v_pipe = None
22
+ current_model = None
23
+
24
+ def get_t2v_pipe(device: str, model_name: str = MODEL_VIDEO):
25
+ """Get or create T2V pipeline with lazy loading and model switching."""
26
+ global t2v_pipe, current_model
27
+
28
+ # Use safe model name
29
+ safe_model_name = get_safe_model_name(model_name, "video")
30
+
31
+ if t2v_pipe is None or current_model != safe_model_name:
32
+ logger.info(f"Loading T2V model: {safe_model_name}")
33
+
34
+ try:
35
+ if "cogvideox" in safe_model_name.lower():
36
+ # Try CogVideoX first
37
+ t2v_pipe = _load_cogvideox(safe_model_name, device)
38
+ else:
39
+ # Use standard diffusers pipeline
40
+ t2v_pipe = _load_standard_t2v(safe_model_name, device)
41
+
42
+ if t2v_pipe is not None:
43
+ current_model = safe_model_name
44
+ logger.info(f"T2V model {safe_model_name} loaded successfully")
45
+ else:
46
+ raise RuntimeError("Failed to load any T2V model")
47
+
48
+ except Exception as e:
49
+ logger.error(f"Failed to load {safe_model_name}: {e}")
50
+ # Fallback to original model
51
+ t2v_pipe = _load_standard_t2v("damo-vilab/text-to-video-ms-1.7b", device)
52
+ current_model = "damo-vilab/text-to-video-ms-1.7b"
53
+
54
+ return t2v_pipe
55
+
56
+ def _load_cogvideox(model_name: str, device: str):
57
+ """Load CogVideoX model."""
58
+ try:
59
+ from diffusers import CogVideoXPipeline
60
+
61
+ pipe = CogVideoXPipeline.from_pretrained(
62
+ model_name,
63
+ torch_dtype="auto",
64
+ trust_remote_code=True
65
+ )
66
+
67
+ if device == "cuda":
68
+ pipe = pipe.to(device)
69
+
70
+ return pipe
71
+
72
+ except Exception as e:
73
+ logger.error(f"Failed to load CogVideoX: {e}")
74
+ return None
75
+
76
+ def _load_standard_t2v(model_name: str, device: str):
77
+ """Load standard T2V model."""
78
+ try:
79
+ from diffusers import TextToVideoSDPipeline
80
+
81
+ pipe = TextToVideoSDPipeline.from_pretrained(
82
+ model_name,
83
+ torch_dtype="auto"
84
+ )
85
+
86
+ if device == "cuda":
87
+ pipe = pipe.to(device)
88
+
89
+ return pipe
90
+
91
+ except Exception as e:
92
+ logger.error(f"Failed to load standard T2V: {e}")
93
+ return None
94
+
95
+ def synth_t2v(prompt: str, seed: int, num_frames: int = 32, fps: int = 8,
96
+ device: str = None, model_name: str = MODEL_VIDEO):
97
+ """
98
+ Generate text-to-video with enhanced model support and frame control.
99
+ """
100
+ if device is None:
101
+ device = get_device()
102
+
103
+ pipe = get_t2v_pipe(device, model_name)
104
+ model_config = MODEL_CONFIGS.get(current_model, {})
105
+
106
+ # Validate frame count against model limits
107
+ max_frames = model_config.get("max_frames", 32)
108
+ min_frames = model_config.get("min_frames", 8)
109
+ num_frames = max(min_frames, min(num_frames, max_frames))
110
+
111
+ logger.info(f"Generating {num_frames} frames at {fps}fps with {current_model}")
112
+
113
+ try:
114
+ # Set up generator
115
+ import torch
116
+ generator = torch.Generator(device=device).manual_seed(seed)
117
+
118
+ # Generate frames based on model type
119
+ if "cogvideox" in current_model.lower():
120
+ # CogVideoX specific generation
121
+ result = pipe(
122
+ prompt=prompt,
123
+ num_frames=num_frames,
124
+ generator=generator,
125
+ guidance_scale=7.5,
126
+ num_inference_steps=20
127
+ )
128
+ frames = result.frames
129
+ else:
130
+ # Standard pipeline
131
+ result = pipe(
132
+ prompt=prompt,
133
+ num_frames=num_frames,
134
+ generator=generator
135
+ )
136
+ frames = result.frames
137
+
138
+ # Convert to numpy arrays and create clip
139
+ frame_arrays = [np.array(frame) for frame in frames]
140
+
141
+ # Create clip using moviepy
142
+ from moviepy.editor import ImageSequenceClip
143
+ clip = ImageSequenceClip(frame_arrays, fps=fps)
144
+
145
+ logger.info(f"Generated video clip: {clip.duration:.2f}s, {len(frame_arrays)} frames")
146
+ return clip
147
+
148
+ except Exception as e:
149
+ logger.error(f"Video generation failed: {e}")
150
+ # Return a simple fallback clip
151
+ return _create_fallback_clip(prompt, num_frames, fps)
152
+
153
+ def _create_fallback_clip(prompt: str, num_frames: int, fps: int):
154
+ """Create a simple fallback clip when video generation fails."""
155
+ try:
156
+ from moviepy.editor import ColorClip, TextClip, CompositeVideoClip
157
+
158
+ # Create a simple colored background
159
+ background = ColorClip(size=(640, 480), color=(100, 50, 200), duration=num_frames/fps)
160
+
161
+ # Add text overlay
162
+ text = TextClip(
163
+ prompt[:50] + "..." if len(prompt) > 50 else prompt,
164
+ fontsize=24,
165
+ color='white',
166
+ font='Arial-Bold'
167
+ ).set_position('center').set_duration(num_frames/fps)
168
+
169
+ # Composite the clips
170
+ clip = CompositeVideoClip([background, text])
171
+
172
+ logger.info(f"Created fallback clip: {clip.duration:.2f}s")
173
+ return clip
174
+
175
+ except Exception as e:
176
+ logger.error(f"Failed to create fallback clip: {e}")
177
+ # Last resort: create a simple color clip
178
+ from moviepy.editor import ColorClip
179
+ return ColorClip(size=(640, 480), color=(100, 50, 200), duration=5.0)
180
+
181
+ def apply_retro_filters(input_path: str, output_path: str, intensity: float = VHS_INTENSITY):
182
+ """
183
+ Apply authentic VHS/CRT effects with enhanced visual artifacts.
184
+ """
185
+ logger.info(f"Applying retro filters with intensity {intensity}")
186
+
187
+ # Check if ffmpeg is available
188
+ if not _check_ffmpeg():
189
+ logger.warning("ffmpeg not available, using simple filter")
190
+ _apply_simple_retro_filters(input_path, output_path)
191
+ return
192
+
193
+ try:
194
+ # Build filter chain for authentic VHS look
195
+ filters = []
196
+
197
+ # 1. Format conversion
198
+ filters.append('format=yuv420p')
199
+
200
+ # 2. Basic color grading for retro look
201
+ filters.append(f'hue=s={0.8 + 0.2 * intensity}')
202
+ filters.append(f'eq=brightness={0.02 * intensity}:contrast={1.0 + 0.1 * intensity}:saturation={1.0 + 0.2 * intensity}:gamma={1.0 - 0.05 * intensity}')
203
+
204
+ # 3. VHS tracking lines and noise
205
+ if intensity > 0.3:
206
+ filters.append(f'tblend=all_mode=difference:all_opacity={0.05 * intensity}')
207
+ filters.append(f'noise=alls={int(20 * intensity)}:allf=t')
208
+
209
+ # 4. Film grain
210
+ if FILM_GRAIN > 0:
211
+ grain = FILM_GRAIN * intensity
212
+ filters.append(f'noise=alls={int(15 * grain)}:allf=u')
213
+
214
+ # 5. Vignetting
215
+ filters.append(f'vignette=PI/4:{0.3 * intensity}')
216
+
217
+ # Apply filters using ffmpeg
218
+ import ffmpeg
219
+
220
+ stream = ffmpeg.input(input_path)
221
+
222
+ # Apply filter chain
223
+ if len(filters) > 1:
224
+ filter_string = ','.join(filters)
225
+ stream = stream.filter_complex(filter_string)
226
+ else:
227
+ stream = stream.filter('format', 'yuv420p')
228
+
229
+ # Output with high quality settings
230
+ stream = stream.output(
231
+ output_path,
232
+ vcodec='libx264',
233
+ pix_fmt='yuv420p',
234
+ crf=20, # Good quality
235
+ preset='medium',
236
+ movflags='+faststart'
237
+ )
238
+
239
+ stream.overwrite_output().run(quiet=True)
240
+ logger.info("Retro filters applied successfully")
241
+
242
+ except Exception as e:
243
+ logger.error(f"Failed to apply retro filters: {e}")
244
+ # Fallback to simple filter
245
+ _apply_simple_retro_filters(input_path, output_path)
246
+
247
+ def _check_ffmpeg() -> bool:
248
+ """Check if ffmpeg is available."""
249
+ try:
250
+ import subprocess
251
+ subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
252
+ return True
253
+ except (subprocess.CalledProcessError, FileNotFoundError):
254
+ return False
255
+
256
+ def _apply_simple_retro_filters(input_path: str, output_path: str):
257
+ """Fallback simple retro filter application."""
258
+ try:
259
+ import ffmpeg
260
+
261
+ (
262
+ ffmpeg
263
+ .input(input_path)
264
+ .filter('format', 'yuv420p')
265
+ .filter('tblend', all_mode='difference', all_opacity=0.05)
266
+ .filter('hue', s=0.9)
267
+ .filter('eq', brightness=0.02, contrast=1.05, saturation=1.1, gamma=0.98)
268
+ .filter('noise', alls=10)
269
+ .output(output_path, vcodec='libx264', pix_fmt='yuv420p', crf=20, movflags='+faststart')
270
+ .overwrite_output()
271
+ .run(quiet=True)
272
+ )
273
+ logger.info("Simple retro filters applied as fallback")
274
+ except Exception as e:
275
+ logger.error(f"Even simple retro filters failed: {e}")
276
+ # Just copy the file
277
+ shutil.copy2(input_path, output_path)
278
+
279
+ def mux_audio(video_in: str, audio_in: str, out_path: str):
280
+ """Mux video and audio with error handling."""
281
+ try:
282
+ if _check_ffmpeg():
283
+ _mux_with_ffmpeg(video_in, audio_in, out_path)
284
+ else:
285
+ _mux_with_moviepy(video_in, audio_in, out_path)
286
+ except Exception as e:
287
+ logger.error(f"Audio muxing failed: {e}")
288
+ # Fallback: just copy video
289
+ shutil.copy2(video_in, out_path)
290
+
291
+ def _mux_with_ffmpeg(video_in: str, audio_in: str, out_path: str):
292
+ """Mux using ffmpeg."""
293
+ import ffmpeg
294
+
295
+ (
296
+ ffmpeg
297
+ .input(video_in)
298
+ .input(audio_in)
299
+ .output(out_path, vcodec='copy', acodec='aac', audio_bitrate='128k', movflags='+faststart')
300
+ .overwrite_output()
301
+ .run(quiet=True)
302
+ )
303
+
304
+ def _mux_with_moviepy(video_in: str, audio_in: str, out_path: str):
305
+ """Mux using moviepy (fallback)."""
306
+ from moviepy.editor import VideoFileClip, AudioFileClip
307
+
308
+ # Load video and audio
309
+ video = VideoFileClip(video_in)
310
+ audio = AudioFileClip(audio_in)
311
+
312
+ # Set audio duration to match video
313
+ if audio.duration > video.duration:
314
+ audio = audio.subclip(0, video.duration)
315
+ elif audio.duration < video.duration:
316
+ # Pad audio with silence
317
+ from moviepy.audio.AudioClip import AudioClip
318
+ silence = AudioClip(lambda t: 0, duration=video.duration - audio.duration)
319
+ audio = audio.concatenate_audioclips([audio, silence])
320
+
321
+ # Combine and write
322
+ final_video = video.set_audio(audio)
323
+ final_video.write_videofile(
324
+ out_path,
325
+ codec='libx264',
326
+ audio_codec='aac',
327
+ temp_audiofile='temp-audio.m4a',
328
+ remove_temp=True,
329
+ verbose=False,
330
+ logger=None
331
+ )
332
+
333
+ # Clean up
334
+ video.close()
335
+ audio.close()
336
+ final_video.close()