Spaces:

drvsbrkcn
/

EceMotion_Pictures

Paused

App Files Files Community

drvsbrkcn commited on Oct 12

Commit

b12e499

verified ·

1 Parent(s): 85846f7

Upload 10 files

Browse files

Files changed (10) hide show

LICENSE +21 -0
README.md +9 -7
app.py +515 -0
config.py +190 -0
llm_script_generator.py +388 -0
promptkit.py +81 -0
requirements.txt +53 -0
sync_manager.py +381 -0
utils_audio.py +292 -0
utils_video.py +336 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 EceMotion Pictures
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,14 +1,16 @@
 ---
 title: EceMotion Pictures
-emoji: 🔥
-colorFrom: pink
-colorTo: yellow
 sdk: gradio
-sdk_version: 5.49.1
 app_file: app.py
 pinned: false
 license: mit
-short_description: AI-powered 1980s style commercial generator.
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: EceMotion Pictures
+emoji: 🎬🎤🤖
+colorFrom: purple
+colorTo: pink
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: mit
+short_description: 1980s style commercial with perfect audio-video sync.
 ---
+models:
+  - damo-vilab/text-to-video-ms-1.7b
+  - parler-tts/parler-tts-mini-v1
+  - microsoft/DialoGPT-medium

app.py ADDED Viewed

	@@ -0,0 +1,515 @@

+"""
+EceMotion Pictures - Production Grade Commercial Generator
+Advanced text-to-video commercial generator with perfect audio-video sync.
+"""
+import os
+import tempfile
+import logging
+from typing import Optional, Tuple, Dict, Any
+from pathlib import Path
+import traceback
+import gradio as gr
+import numpy as np
+# Import our enhanced modules
+from config import (
+    MODEL_VIDEO, MODEL_AUDIO, MODEL_LLM, MAX_DURATION, MIN_DURATION,
+    DEFAULT_FPS, VOICE_STYLES, get_device, validate_config, log_config
+)
+from sync_manager import create_sync_manager
+from llm_script_generator import create_script_generator
+from utils_audio import synth_voice, retro_bed, mix_to_stereo, write_wav
+from utils_video import synth_t2v, apply_retro_filters, mux_audio
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Initialize components
+DEVICE = get_device()
+sync_manager = create_sync_manager()
+script_generator = create_script_generator()
+# Validate configuration
+if not validate_config():
+    logger.error("Configuration validation failed")
+    exit(1)
+# Log configuration
+log_config()
+# Modern CSS for Gradio
+CSS = """
+.gradio-container {
+    max-width: 1200px !important;
+    margin: 0 auto;
+}
+.app-header {
+    text-align: center;
+    margin-bottom: 2rem;
+}
+.app-title {
+    font-size: 2.5rem;
+    font-weight: 700;
+    background: linear-gradient(45deg, #ff6b6b, #4ecdc4);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    margin-bottom: 0.5rem;
+}
+.app-subtitle {
+    opacity: 0.7;
+    font-size: 1.1rem;
+    color: #666;
+}
+.control-section {
+    background: #f8f9fa;
+    border-radius: 12px;
+    padding: 1.5rem;
+    margin-bottom: 1rem;
+}
+.output-section {
+    background: #ffffff;
+    border: 2px solid #e9ecef;
+    border-radius: 12px;
+    padding: 1.5rem;
+}
+.progress-info {
+    background: #e3f2fd;
+    border-left: 4px solid #2196f3;
+    padding: 1rem;
+    margin: 1rem 0;
+    border-radius: 4px;
+}
+.error-info {
+    background: #ffebee;
+    border-left: 4px solid #f44336;
+    padding: 1rem;
+    margin: 1rem 0;
+    border-radius: 4px;
+}
+"""
+# Example configurations
+EXAMPLES = [
+    {
+        "brand": "EceMotion Pictures",
+        "structure": "Montage → Close-up → Logo stinger",
+        "script": "Remember when technology was simple?",
+        "voice": "Announcer '80s",
+        "duration": 10
+    },
+    {
+        "brand": "VaporWave Studios",
+        "structure": "Before/After → Feature highlight → CTA",
+        "script": "The future is now, but it looks like the past",
+        "voice": "Mall PA",
+        "duration": 8
+    },
+    {
+        "brand": "Neon Dreams",
+        "structure": "Unboxing → Demo → Deal countdown",
+        "script": "Step into the digital sunset",
+        "voice": "Late Night",
+        "duration": 12
+    }
+]
+def create_interface():
+    """Create the modern Gradio interface."""
+    with gr.Blocks(
+        css=CSS,
+        title="EceMotion Pictures",
+        theme=gr.themes.Soft()
+    ) as demo:
+        # Header
+        with gr.Row():
+            gr.HTML("""
+                <div class="app-header">
+                    <div class="app-title">🎬 EceMotion Pictures</div>
+                    <div class="app-subtitle">AI-Powered 1980s Style Commercial Generator</div>
+                </div>
+            """)
+        # Main interface
+        with gr.Row():
+            # Left column - Controls
+            with gr.Column(scale=1):
+                with gr.Group():
+                    gr.Markdown("### 🎯 Commercial Setup")
+                    brand = gr.Textbox(
+                        label="Brand Name",
+                        placeholder="YourBrand™",
+                        value="EceMotion Pictures",
+                        info="Enter your brand name"
+                    )
+                    structure = gr.Textbox(
+                        label="Commercial Structure",
+                        placeholder="e.g., Montage → Close-up → Logo stinger",
+                        value="Montage → Close-up → Logo stinger",
+                        info="Define the flow of your commercial"
+                    )
+                    with gr.Row():
+                        script_prompt = gr.Textbox(
+                            label="Script Hook",
+                            placeholder="Opening hook or idea",
+                            value="Remember when technology was simple?",
+                            scale=3
+                        )
+                        roll_btn = gr.Button("🎲", scale=1, size="sm")
+                    duration = gr.Slider(
+                        minimum=MIN_DURATION,
+                        maximum=MAX_DURATION,
+                        value=10,
+                        step=1,
+                        label="Duration (seconds)",
+                        info=f"Between {MIN_DURATION}-{MAX_DURATION} seconds"
+                    )
+                with gr.Group():
+                    gr.Markdown("### 🎤 Audio Settings")
+                    voice = gr.Dropdown(
+                        choices=list(VOICE_STYLES.keys()),
+                        value="Announcer '80s",
+                        label="Voice Style",
+                        info="Choose the announcer style"
+                    )
+                    music = gr.Checkbox(
+                        value=True,
+                        label="Background Music",
+                        info="Add retro synth jingle"
+                    )
+                with gr.Group():
+                    gr.Markdown("### ⚙️ Advanced Settings")
+                    with gr.Accordion("Model & Quality", open=False):
+                        model_video = gr.Dropdown(
+                            choices=["damo-vilab/text-to-video-ms-1.7b", "THUDM/CogVideoX-5b"],
+                            value=MODEL_VIDEO,
+                            label="Video Model",
+                            info="Choose the text-to-video model"
+                        )
+                        model_audio = gr.Dropdown(
+                            choices=["parler-tts/parler-tts-mini-v1", "SWivid/F5-TTS"],
+                            value=MODEL_AUDIO,
+                            label="Audio Model",
+                            info="Choose the text-to-speech model"
+                        )
+                    with gr.Accordion("Retro Effects", open=False):
+                        vhs_intensity = gr.Slider(
+                            minimum=0.0,
+                            maximum=1.0,
+                            value=0.5,
+                            step=0.1,
+                            label="VHS Effect Intensity"
+                        )
+                    seed = gr.Number(
+                        value=42,
+                        precision=0,
+                        label="Random Seed",
+                        info="For reproducible results"
+                    )
+                # Generate button
+                generate_btn = gr.Button(
+                    "🎬 Generate Commercial",
+                    variant="primary",
+                    size="lg"
+                )
+            # Right column - Output
+            with gr.Column(scale=1):
+                with gr.Group():
+                    gr.Markdown("### 📺 Generated Commercial")
+                    # Progress tracking
+                    progress_info = gr.HTML("""
+                        <div class="progress-info">
+                            <strong>Ready to generate!</strong><br>
+                            Click the generate button to create your retro commercial.
+                        </div>
+                    """)
+                    # Video output
+                    output_video = gr.Video(
+                        height=400,
+                        label="Commercial Preview",
+                        show_download_button=True
+                    )
+                    # Script output
+                    output_script = gr.Textbox(
+                        label="Generated Script",
+                        lines=8,
+                        max_lines=12,
+                        show_copy_button=True
+                    )
+                    # Download section
+                    with gr.Row():
+                        download_btn = gr.DownloadButton(
+                            "📥 Download Commercial",
+                            variant="secondary"
+                        )
+        # Examples section
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### 💡 Example Configurations")
+                examples = gr.Examples(
+                    examples=EXAMPLES,
+                    inputs=[brand, structure, script_prompt, voice, duration],
+                    label="Click to load example"
+                )
+        # Footer
+        gr.Markdown("""
+        <div style='text-align: center; opacity: 0.7; font-size: 0.9rem; margin-top: 2rem;'>
+            <p>🎬 Powered by EceMotion Pictures • Perfect audio-video sync • Professional quality</p>
+            <p>Models: Text-to-Video • Text-to-Speech • Enhanced VHS effects</p>
+        </div>
+        """)
+        # Event handlers
+        def roll_script_suggestion(structure_text: str, seed_val: int) -> str:
+            """Generate script suggestions using LLM."""
+            try:
+                suggestions = script_generator.suggest_scripts(structure_text, n=1, seed=seed_val)
+                return suggestions[0] if suggestions else "Back to '87 - the future is now!"
+            except Exception as e:
+                logger.error(f"Script suggestion failed: {e}")
+                return "Back to '87 - the future is now!"
+        def generate_commercial(
+            brand_name: str,
+            structure_text: str,
+            script_text: str,
+            duration_val: int,
+            voice_style: str,
+            music_enabled: bool,
+            video_model: str,
+            audio_model: str,
+            vhs_intensity: float,
+            seed_val: int
+        ) -> Tuple[str, str, str]:
+            """
+            Generate a complete retro commercial with perfect sync.
+            """
+            try:
+                # Update progress
+                progress_html = """
+                    <div class="progress-info">
+                        <strong>🎬 Generating Commercial...</strong><br>
+                        <div style="margin-top: 0.5rem;">
+                            <div>📝 Generating script with AI...</div>
+                        </div>
+                    </div>
+                """
+                yield progress_html, None, None, None
+                # Generate script using LLM
+                generated_script = script_generator.generate_script(
+                    brand=brand_name or "Brand",
+                    structure=structure_text or "Montage → Close-up → Logo",
+                    script_prompt=script_text or "Back to '87",
+                    duration=duration_val,
+                    voice_style=voice_style,
+                    seed=seed_val
+                )
+                # Update progress
+                progress_html = """
+                    <div class="progress-info">
+                        <strong>🎬 Generating Commercial...</strong><br>
+                        <div style="margin-top: 0.5rem;">
+                            <div>✅ Script generated</div>
+                            <div>🎥 Generating video...</div>
+                        </div>
+                    </div>
+                """
+                yield progress_html, None, None, None
+                # Create temporary directory
+                with tempfile.TemporaryDirectory() as tmpdir:
+                    # Generate video
+                    video_prompt = f"{structure_text}. {script_text}. 1980s commercial, VHS texture, soft lighting, bold retro titles, 4:3, brand {brand_name}"
+                    # Calculate optimal frame count
+                    num_frames = sync_manager.get_optimal_frame_count(duration_val, DEFAULT_FPS)
+                    clip = synth_t2v(
+                        prompt=video_prompt,
+                        seed=seed_val,
+                        num_frames=num_frames,
+                        fps=DEFAULT_FPS,
+                        device=DEVICE,
+                        model_name=video_model
+                    )
+                    # Save raw video
+                    raw_video_path = os.path.join(tmpdir, "raw.mp4")
+                    clip.write_videofile(
+                        raw_video_path,
+                        fps=DEFAULT_FPS,
+                        codec='libx264',
+                        audio=False,
+                        verbose=False,
+                        logger=None
+                    )
+                    # Apply retro filters
+                    retro_video_path = os.path.join(tmpdir, "retro.mp4")
+                    apply_retro_filters(raw_video_path, retro_video_path, intensity=vhs_intensity)
+                    # Update progress
+                    progress_html = """
+                        <div class="progress-info">
+                            <strong>🎬 Generating Commercial...</strong><br>
+                            <div style="margin-top: 0.5rem;">
+                                <div>✅ Script generated</div>
+                                <div>✅ Video generated</div>
+                                <div>🎤 Generating audio...</div>
+                            </div>
+                        </div>
+                    """
+                    yield progress_html, None, None, None
+                    # Generate audio
+                    voiceover_text = " ".join([seg.text for seg in generated_script.segments])
+                    sr_voice, wav_voice = synth_voice(
+                        text=voiceover_text,
+                        voice_prompt=voice_style,
+                        model_name=audio_model,
+                        device=DEVICE
+                    )
+                    # Add background music if requested
+                    if music_enabled:
+                        sr_music, wav_music = retro_bed(clip.duration)
+                        sr_final, stereo_audio = mix_to_stereo(
+                            sr_voice, wav_voice, sr_music, wav_music, bed_gain=0.3
+                        )
+                    else:
+                        sr_final = sr_voice
+                        stereo_audio = np.stack([wav_voice, wav_voice], axis=1)
+                    # Save audio
+                    audio_path = os.path.join(tmpdir, "audio.wav")
+                    write_wav(audio_path, sr_final, stereo_audio)
+                    # Update progress
+                    progress_html = """
+                        <div class="progress-info">
+                            <strong>🎬 Generating Commercial...</strong><br>
+                            <div style="margin-top: 0.5rem;">
+                                <div>✅ Script generated</div>
+                                <div>✅ Video generated</div>
+                                <div>✅ Audio generated</div>
+                                <div>🔄 Synchronizing audio and video...</div>
+                            </div>
+                        </div>
+                    """
+                    yield progress_html, None, None, None
+                    # Synchronize audio and video
+                    final_video_path = os.path.join(tmpdir, f"{brand_name}_commercial.mp4")
+                    sync_manager.synchronize_media(
+                        video_path=retro_video_path,
+                        audio_path=audio_path,
+                        output_path=final_video_path,
+                        prefer_audio_duration=True
+                    )
+                    # Validate sync
+                    is_synced, sync_diff = sync_manager.validate_sync(final_video_path, final_video_path)
+                    # Format script output
+                    script_lines = []
+                    for i, segment in enumerate(generated_script.segments, 1):
+                        script_lines.append(f"{i}. {segment.timing_marker} {segment.text}")
+                    script_output = "\n".join(script_lines) + f"\n\nTAGLINE: {generated_script.tagline}"
+                    # Final progress
+                    sync_status = "✅ Perfect sync" if is_synced else f"⚠️ Sync diff: {sync_diff:.3f}s"
+                    progress_html = f"""
+                        <div class="progress-info">
+                            <strong>🎉 Commercial Complete!</strong><br>
+                            <div style="margin-top: 0.5rem;">
+                                <div>✅ Script generated ({generated_script.word_count} words)</div>
+                                <div>✅ Video generated ({num_frames} frames)</div>
+                                <div>✅ Audio generated ({len(stereo_audio)/sr_final:.1f}s)</div>
+                                <div>{sync_status}</div>
+                            </div>
+                        </div>
+                    """
+                    yield progress_html, final_video_path, script_output, final_video_path
+            except Exception as e:
+                logger.error(f"Commercial generation failed: {e}")
+                logger.error(f"Traceback: {traceback.format_exc()}")
+                error_html = f"""
+                    <div class="error-info">
+                        <strong>❌ Generation Failed</strong><br>
+                        <div style="margin-top: 0.5rem; color: #666;">
+                            Error: {str(e)}<br>
+                            Please try again with different parameters or check the logs.
+                        </div>
+                    </div>
+                """
+                yield error_html, None, None, None
+        # Connect event handlers
+        roll_btn.click(
+            roll_script_suggestion,
+            inputs=[structure, seed],
+            outputs=[script_prompt]
+        )
+        generate_btn.click(
+            generate_commercial,
+            inputs=[
+                brand, structure, script_prompt, duration, voice, music,
+                model_video, model_audio, vhs_intensity, seed
+            ],
+            outputs=[progress_info, output_video, output_script, download_btn]
+        )
+    return demo
+def main():
+    """Main application entry point."""
+    logger.info("Starting EceMotion Pictures")
+    logger.info(f"Using device: {DEVICE}")
+    logger.info(f"Video model: {MODEL_VIDEO}")
+    logger.info(f"Audio model: {MODEL_AUDIO}")
+    logger.info(f"LLM model: {MODEL_LLM}")
+    # Create and launch interface
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )
+if __name__ == "__main__":
+    main()

config.py ADDED Viewed

	@@ -0,0 +1,190 @@

+"""
+Configuration management for EceMotion Pictures.
+Centralized settings for models, parameters, and deployment.
+"""
+import os
+from typing import Dict, Any, Optional
+# Model Configuration - with fallbacks for HuggingFace Spaces
+MODEL_VIDEO = os.getenv("MODEL_VIDEO", "damo-vilab/text-to-video-ms-1.7b")  # Start with lighter model
+MODEL_AUDIO = os.getenv("MODEL_AUDIO", "parler-tts/parler-tts-mini-v1")  # Start with working model
+MODEL_LLM = os.getenv("MODEL_LLM", "microsoft/DialoGPT-medium")  # Start with lighter LLM
+# Video Configuration
+MAX_DURATION = int(os.getenv("MAX_DURATION", "15"))
+MIN_DURATION = int(os.getenv("MIN_DURATION", "5"))
+DEFAULT_FPS = int(os.getenv("DEFAULT_FPS", "8"))
+DEFAULT_FRAMES = int(os.getenv("DEFAULT_FRAMES", "64"))  # 8 seconds at 8fps
+# Audio Configuration
+AUDIO_SAMPLE_RATE = int(os.getenv("AUDIO_SAMPLE_RATE", "22050"))  # Standard rate
+AUDIO_BITRATE = os.getenv("AUDIO_BITRATE", "128k")  # Lower bitrate for stability
+MUSIC_GAIN = float(os.getenv("MUSIC_GAIN", "0.3"))
+# GPU Configuration
+GPU_MEMORY_THRESHOLD = float(os.getenv("GPU_MEMORY_THRESHOLD", "0.8"))
+USE_QUANTIZATION = os.getenv("USE_QUANTIZATION", "true").lower() == "true"
+QUANTIZATION_BITS = int(os.getenv("QUANTIZATION_BITS", "8"))
+# Sync Configuration
+SYNC_TOLERANCE_MS = int(os.getenv("SYNC_TOLERANCE_MS", "200"))  # More lenient for stability
+FORCE_SYNC = os.getenv("FORCE_SYNC", "false").lower() == "true"  # Disabled by default
+# Retro Filter Configuration
+VHS_INTENSITY = float(os.getenv("VHS_INTENSITY", "0.5"))
+SCANLINE_OPACITY = float(os.getenv("SCANLINE_OPACITY", "0.2"))
+CHROMATIC_ABERRATION = float(os.getenv("CHROMATIC_ABERRATION", "0.05"))
+FILM_GRAIN = float(os.getenv("FILM_GRAIN", "0.1"))
+# UI Configuration
+UI_THEME = os.getenv("UI_THEME", "default")
+SHOW_PROGRESS = os.getenv("SHOW_PROGRESS", "true").lower() == "true"
+ENABLE_EXAMPLES = os.getenv("ENABLE_EXAMPLES", "true").lower() == "true"
+# Logging Configuration
+LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
+LOG_FORMAT = os.getenv("LOG_FORMAT", "text")  # text format for HuggingFace Spaces
+# Model-specific configurations with conservative settings
+MODEL_CONFIGS: Dict[str, Dict[str, Any]] = {
+    "damo-vilab/text-to-video-ms-1.7b": {
+        "max_frames": 64,
+        "min_frames": 8,
+        "default_frames": 32,
+        "memory_usage_gb": 6,
+        "supports_quantization": False,
+        "stable": True,
+    },
+    "THUDM/CogVideoX-5b": {
+        "max_frames": 48,  # Reduced for stability
+        "min_frames": 16,
+        "default_frames": 32,
+        "memory_usage_gb": 16,  # Conservative estimate
+        "supports_quantization": True,
+        "stable": False,  # Mark as experimental
+    },
+    "parler-tts/parler-tts-mini-v1": {
+        "max_text_length": 500,
+        "min_text_length": 10,
+        "default_voice": "Announcer '80s",
+        "memory_usage_gb": 2,
+        "stable": True,
+    },
+    "SWivid/F5-TTS": {
+        "max_text_length": 300,
+        "min_text_length": 10,
+        "default_voice": "announcer",
+        "memory_usage_gb": 4,
+        "stable": False,  # Mark as experimental
+    },
+    "microsoft/DialoGPT-medium": {
+        "max_tokens": 512,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "memory_usage_gb": 2,
+        "stable": True,
+    },
+    "Qwen/Qwen2.5-7B-Instruct": {
+        "max_tokens": 1024,
+        "temperature": 0.7,
+        "top_p": 0.9,
+        "memory_usage_gb": 8,
+        "stable": False,  # Mark as experimental
+    },
+}
+# Voice styles for TTS
+VOICE_STYLES = {
+    "Announcer '80s": "A confident, upbeat 1980s TV announcer with warm AM-radio tone.",
+    "Mall PA": "Casual, slightly echoey mall public-address vibe.",
+    "Late Night": "Low energy, sly late-night infomercial style.",
+    "News Anchor": "Professional, authoritative news anchor delivery.",
+    "Infomercial": "Enthusiastic, persuasive infomercial host style.",
+    "Radio DJ": "Smooth, charismatic radio disc jockey voice.",
+}
+# Structure templates for script generation
+STRUCTURE_TEMPLATES = [
+    "Montage → Close-up → Logo stinger",
+    "Before/After → Feature highlight → CTA",
+    "Testimonial → B-roll → Price tag reveal",
+    "Unboxing → Demo → Deal countdown",
+    "Retro news bulletin → Product shot → Tagline",
+    "Opening hook → Problem/Solution → Call to action",
+    "Brand story → Product showcase → Final tagline",
+]
+# Taglines for commercial endings
+TAGLINES = [
+    "So retro, it's the future.",
+    "Pixels you can trust.",
+    "VHS vibes. Modern results.",
+    "Old-school cool. New-school sales.",
+    "Where nostalgia meets innovation.",
+    "Rewind to the future.",
+    "Classic style. Modern performance.",
+    "The past perfected.",
+    "EceMotion Pictures - Bringing the '80s back to life.",
+    "Your story, our vision, timeless memories.",
+]
+def get_model_config(model_name: str) -> Dict[str, Any]:
+    """Get configuration for a specific model."""
+    return MODEL_CONFIGS.get(model_name, {
+        "max_frames": 32,
+        "min_frames": 8,
+        "default_frames": 16,
+        "memory_usage_gb": 4,
+        "supports_quantization": False,
+        "stable": True,
+    })
+def get_device() -> str:
+    """Determine the best available device."""
+    try:
+        import torch
+        if torch.cuda.is_available() and os.getenv("CUDA_VISIBLE_DEVICES", None) not in (None, ""):
+            return "cuda"
+    except ImportError:
+        pass
+    return "cpu"
+def validate_config() -> bool:
+    """Validate configuration settings."""
+    try:
+        assert MIN_DURATION < MAX_DURATION, "MIN_DURATION must be less than MAX_DURATION"
+        assert DEFAULT_FPS > 0, "DEFAULT_FPS must be positive"
+        assert AUDIO_SAMPLE_RATE > 0, "AUDIO_SAMPLE_RATE must be positive"
+        assert 0 <= VHS_INTENSITY <= 1, "VHS_INTENSITY must be between 0 and 1"
+        assert 0 <= SCANLINE_OPACITY <= 1, "SCANLINE_OPACITY must be between 0 and 1"
+        return True
+    except AssertionError as e:
+        print(f"Configuration validation failed: {e}")
+        return False
+def get_safe_model_name(model_name: str, model_type: str) -> str:
+    """Get a safe model name with fallback to stable models."""
+    config = get_model_config(model_name)
+    # If model is not stable, fallback to stable alternatives
+    if not config.get("stable", False):
+        if model_type == "video":
+            return "damo-vilab/text-to-video-ms-1.7b"
+        elif model_type == "audio":
+            return "parler-tts/parler-tts-mini-v1"
+        elif model_type == "llm":
+            return "microsoft/DialoGPT-medium"
+    return model_name
+def log_config():
+    """Log current configuration for debugging."""
+    print(f"EceMotion Pictures Configuration:")
+    print(f"  Video Model: {MODEL_VIDEO}")
+    print(f"  Audio Model: {MODEL_AUDIO}")
+    print(f"  LLM Model: {MODEL_LLM}")
+    print(f"  Device: {get_device()}")
+    print(f"  Duration Range: {MIN_DURATION}-{MAX_DURATION}s")
+    print(f"  FPS: {DEFAULT_FPS}")
+    print(f"  Sync Tolerance: {SYNC_TOLERANCE_MS}ms")

llm_script_generator.py ADDED Viewed

	@@ -0,0 +1,388 @@

+"""
+LLM-powered script generation for EceMotion Pictures.
+Generates intelligent, structure-aware commercial scripts with timing markers.
+"""
+import logging
+import random
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+from config import (
+    MODEL_LLM, MODEL_CONFIGS, VOICE_STYLES, STRUCTURE_TEMPLATES, TAGLINES,
+    get_safe_model_name
+)
+logger = logging.getLogger(__name__)
+@dataclass
+class ScriptSegment:
+    """Represents a segment of the commercial script with timing information."""
+    text: str
+    duration_estimate: float
+    segment_type: str  # "hook", "flow", "benefit", "cta"
+    timing_marker: Optional[str] = None
+@dataclass
+class GeneratedScript:
+    """Complete generated script with all segments and metadata."""
+    segments: List[ScriptSegment]
+    total_duration: float
+    tagline: str
+    voice_style: str
+    word_count: int
+    raw_script: str
+class LLMScriptGenerator:
+    """Generates commercial scripts using large language models with fallbacks."""
+    def __init__(self, model_name: str = MODEL_LLM):
+        self.model_name = get_safe_model_name(model_name, "llm")
+        self.model = None
+        self.tokenizer = None
+        self.model_config = MODEL_CONFIGS.get(self.model_name, {})
+        self.llm_available = False
+        # Try to initialize LLM
+        self._try_init_llm()
+    def _try_init_llm(self):
+        """Try to initialize the LLM model."""
+        try:
+            if "dialo" in self.model_name.lower():
+                self._init_dialogpt()
+            elif "qwen" in self.model_name.lower():
+                self._init_qwen()
+            else:
+                logger.warning(f"Unknown LLM model: {self.model_name}, using fallback")
+                self.llm_available = False
+        except Exception as e:
+            logger.warning(f"Failed to initialize LLM {self.model_name}: {e}")
+            self.llm_available = False
+    def _init_dialogpt(self):
+        """Initialize DialoGPT model."""
+        try:
+            from transformers import AutoTokenizer, AutoModelForCausalLM
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                torch_dtype="auto",
+                device_map="auto" if self._has_gpu() else "cpu"
+            )
+            self.llm_available = True
+            logger.info(f"DialoGPT model {self.model_name} loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load DialoGPT: {e}")
+            self.llm_available = False
+    def _init_qwen(self):
+        """Initialize Qwen model."""
+        try:
+            from transformers import AutoTokenizer, AutoModelForCausalLM
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                trust_remote_code=True
+            )
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                torch_dtype="auto",
+                device_map="auto" if self._has_gpu() else "cpu",
+                trust_remote_code=True
+            )
+            self.llm_available = True
+            logger.info(f"Qwen model {self.model_name} loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load Qwen: {e}")
+            self.llm_available = False
+    def _has_gpu(self) -> bool:
+        """Check if GPU is available."""
+        try:
+            import torch
+            return torch.cuda.is_available()
+        except ImportError:
+            return False
+    def _create_system_prompt(self) -> str:
+        """Create system prompt for retro commercial script generation."""
+        return """You are a professional copywriter specializing in 1980s-style TV commercials.
+Your task is to create engaging, persuasive commercial scripts that capture the authentic retro aesthetic.
+Key requirements:
+- Use 1980s commercial language and style
+- Include clear hooks, benefits, and calls-to-action
+- Keep scripts concise and punchy
+- Use active voice and emotional appeals
+- End with a memorable tagline
+Format your response as:
+HOOK: [Opening attention-grabber]
+FLOW: [Main content following the structure]
+BENEFIT: [Key value proposition]
+CTA: [Call to action with tagline]
+Keep each segment under 2-3 sentences. Use enthusiastic, confident language typical of 1980s advertising."""
+    def _create_user_prompt(self, brand: str, structure: str, script_prompt: str,
+                          duration: int, voice_style: str) -> str:
+        """Create user prompt with specific requirements."""
+        return f"""Create a {duration}-second retro commercial script for {brand}.
+Structure: {structure}
+Script idea: {script_prompt}
+Voice style: {voice_style}
+Make it authentic to 1980s TV commercials with the energy and style of that era."""
+    def _parse_script_response(self, response: str) -> List[ScriptSegment]:
+        """Parse LLM response into structured script segments."""
+        segments = []
+        # Split by segment markers
+        import re
+        parts = re.split(r'(HOOK:|FLOW:|BENEFIT:|CTA:)', response)
+        for i in range(1, len(parts), 2):
+            if i + 1 < len(parts):
+                segment_type = parts[i].rstrip(':').lower()
+                text = parts[i + 1].strip()
+                if text:
+                    # Estimate duration based on word count (150 WPM)
+                    word_count = len(text.split())
+                    duration = (word_count / 150) * 60  # Convert to seconds
+                    segments.append(ScriptSegment(
+                        text=text,
+                        duration_estimate=duration,
+                        segment_type=segment_type,
+                        timing_marker=f"[{segment_type.upper()}]"
+                    ))
+        return segments
+    def _extract_tagline(self, response: str) -> str:
+        """Extract tagline from the script response."""
+        # Look for tagline in CTA section
+        import re
+        cta_match = re.search(r'CTA:.*?([A-Z][^.!?]*[.!?])', response, re.DOTALL)
+        if cta_match:
+            cta_text = cta_match.group(1)
+            # Extract the last sentence as potential tagline
+            sentences = re.split(r'[.!?]+', cta_text)
+            if sentences:
+                tagline = sentences[-1].strip()
+                if len(tagline) > 5:  # Ensure it's substantial
+                    return tagline
+        # Fallback to predefined taglines
+        return random.choice(TAGLINES)
+    def generate_script_with_llm(self, brand: str, structure: str, script_prompt: str,
+                                duration: int, voice_style: str, seed: int = 42) -> GeneratedScript:
+        """Generate script using LLM."""
+        if not self.llm_available:
+            raise RuntimeError("LLM not available")
+        # Set random seed for reproducibility
+        random.seed(seed)
+        # Create prompts
+        system_prompt = self._create_system_prompt()
+        user_prompt = self._create_user_prompt(brand, structure, script_prompt, duration, voice_style)
+        # Format for the model
+        if "dialo" in self.model_name.lower():
+            # DialoGPT format
+            text = f"{user_prompt}\n\nResponse:"
+        else:
+            # Generic format
+            text = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:"
+        # Tokenize
+        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
+        # Generate
+        with self.model.eval():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=self.model_config.get("max_tokens", 256),
+                temperature=self.model_config.get("temperature", 0.7),
+                top_p=self.model_config.get("top_p", 0.9),
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id,
+                eos_token_id=self.tokenizer.eos_token_id,
+                num_return_sequences=1
+            )
+        # Decode response
+        response = self.tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
+        logger.info(f"Generated script response: {response[:200]}...")
+        # Parse response
+        segments = self._parse_script_response(response)
+        tagline = self._extract_tagline(response)
+        # Calculate total duration
+        total_duration = sum(segment.duration_estimate for segment in segments)
+        # Calculate word count
+        word_count = sum(len(segment.text.split()) for segment in segments)
+        return GeneratedScript(
+            segments=segments,
+            total_duration=total_duration,
+            tagline=tagline,
+            voice_style=voice_style,
+            word_count=word_count,
+            raw_script=response
+        )
+    def generate_script_with_template(self, brand: str, structure: str, script_prompt: str,
+                                    duration: int, voice_style: str, seed: int = 42) -> GeneratedScript:
+        """Generate script using template-based approach (fallback)."""
+        random.seed(seed)
+        # Select structure template
+        structure_template = structure.strip() or random.choice(STRUCTURE_TEMPLATES)
+        # Generate segments based on template
+        segments = []
+        # Hook
+        hook_text = script_prompt or f"Introducing {brand} - the future is here!"
+        segments.append(ScriptSegment(
+            text=hook_text,
+            duration_estimate=2.0,
+            segment_type="hook",
+            timing_marker="[HOOK]"
+        ))
+        # Flow (based on structure)
+        flow_text = f"With {structure_template.lower()}, {brand} delivers results like never before."
+        segments.append(ScriptSegment(
+            text=flow_text,
+            duration_estimate=3.0,
+            segment_type="flow",
+            timing_marker="[FLOW]"
+        ))
+        # Benefit
+        benefit_text = "Faster, simpler, cooler - just like your favorite retro tech."
+        segments.append(ScriptSegment(
+            text=benefit_text,
+            duration_estimate=2.5,
+            segment_type="benefit",
+            timing_marker="[BENEFIT]"
+        ))
+        # CTA
+        tagline = random.choice(TAGLINES)
+        cta_text = f"Try {brand} today. {tagline}"
+        segments.append(ScriptSegment(
+            text=cta_text,
+            duration_estimate=2.5,
+            segment_type="cta",
+            timing_marker="[CTA]"
+        ))
+        # Calculate totals
+        total_duration = sum(segment.duration_estimate for segment in segments)
+        word_count = sum(len(segment.text.split()) for segment in segments)
+        return GeneratedScript(
+            segments=segments,
+            total_duration=total_duration,
+            tagline=tagline,
+            voice_style=voice_style,
+            word_count=word_count,
+            raw_script=f"Template-based script for {brand}"
+        )
+    def generate_script(self, brand: str, structure: str, script_prompt: str,
+                       duration: int, voice_style: str, seed: int = 42) -> GeneratedScript:
+        """
+        Generate a complete commercial script.
+        """
+        try:
+            if self.llm_available:
+                return self.generate_script_with_llm(brand, structure, script_prompt, duration, voice_style, seed)
+            else:
+                logger.info("Using template-based script generation (LLM not available)")
+                return self.generate_script_with_template(brand, structure, script_prompt, duration, voice_style, seed)
+        except Exception as e:
+            logger.error(f"Script generation failed: {e}")
+            logger.info("Falling back to template-based generation")
+            return self.generate_script_with_template(brand, structure, script_prompt, duration, voice_style, seed)
+    def suggest_scripts(self, structure: str, n: int = 6, seed: int = 0) -> List[str]:
+        """
+        Generate multiple script suggestions based on structure.
+        """
+        try:
+            suggestions = []
+            for i in range(n):
+                script = self.generate_script(
+                    brand="YourBrand",
+                    structure=structure,
+                    script_prompt="Create an engaging hook",
+                    duration=10,
+                    voice_style="Announcer '80s",
+                    seed=seed + i
+                )
+                # Extract hook from first segment
+                if script.segments:
+                    hook = script.segments[0].text
+                    suggestions.append(hook)
+                else:
+                    suggestions.append("Back to '87 - the future is now!")
+            return suggestions
+        except Exception as e:
+            logger.warning(f"Script suggestion failed: {e}")
+            # Fallback to original random generation
+            return self._fallback_suggestions(structure, n, seed)
+    def _fallback_suggestions(self, structure: str, n: int, seed: int) -> List[str]:
+        """Fallback to original random script generation."""
+        random.seed(seed)
+        base = (structure or "").lower().strip()
+        ideas = []
+        for _ in range(n):
+            style = random.choice(["infomercial", "mall ad", "late-night", "newsflash", "arcade bumper"])
+            shot = random.choice(["neon grid", "CRT scanlines", "vaporwave sunset", "shopping mall", "boombox close-up"])
+            hook = random.choice([
+                "Remember this sound?", "Back to '87.", "Deal of the decade.",
+                "We paused time.", "Be kind, rewind your brand."
+            ])
+            idea = f"{hook} {style} with {shot}."
+            # Light correlation with structure
+            for kw in ["montage", "testimonial", "news", "unboxing", "before", "after", "countdown", "logo", "cta"]:
+                if kw in base and kw not in idea:
+                    idea += f" Includes {kw}."
+            ideas.append(idea)
+        return ideas
+def create_script_generator() -> LLMScriptGenerator:
+    """Factory function to create a script generator."""
+    return LLMScriptGenerator()

promptkit.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+Legacy promptkit module for EceMotion Pictures.
+Maintained for backward compatibility.
+"""
+from dataclasses import dataclass
+from typing import Dict, List
+import random
+TAGLINES = [
+    "So retro, it's the future.",
+    "Pixels you can trust.",
+    "VHS vibes. Modern results.",
+    "Old-school cool. New-school sales.",
+    "EceMotion Pictures - Bringing the '80s back to life.",
+    "Your story, our vision, timeless memories.",
+]
+VOICE_STYLES = {
+    "Announcer '80s": "A confident, upbeat 1980s TV announcer with warm AM-radio tone.",
+    "Mall PA": "Casual, slightly echoey mall public-address vibe.",
+    "Late Night": "Low energy, sly late-night infomercial style.",
+    "News Anchor": "Professional, authoritative news anchor delivery.",
+    "Infomercial": "Enthusiastic, persuasive infomercial host style.",
+    "Radio DJ": "Smooth, charismatic radio disc jockey voice.",
+}
+STRUCTURE_TEMPLATES = [
+    "Montage → Close-up → Logo stinger",
+    "Before/After → Feature highlight → CTA",
+    "Testimonial → B-roll → Price tag reveal",
+    "Unboxing → Demo → Deal countdown",
+    "Retro news bulletin → Product shot → Tagline",
+    "Opening hook → Problem/Solution → Call to action",
+    "Brand story → Product showcase → Final tagline",
+]
+@dataclass
+class AdPlan:
+    brand: str
+    structure: str
+    script_prompt: str
+    duration: int
+    voice_style: str
+    seed: int
+    def script(self) -> Dict[str, str]:
+        random.seed(self.seed)
+        tl = random.choice(TAGLINES)
+        structure = self.structure.strip() or random.choice(STRUCTURE_TEMPLATES)
+        # 4-beat VO using structure + script prompt
+        beats = [
+            f"HOOK: {self.brand} — {self.script_prompt}",
+            f"FLOW: {structure}",
+            "BENEFIT: Faster, simpler, cooler — like your favorite retro tech.",
+            f"CTA: Try {self.brand} today. {tl}",
+        ]
+        vo = " ".join([b.split(': ',1)[1] for b in beats])
+        return {"lines": beats, "voiceover": vo, "tagline": tl}
+def suggest_scripts(structure_prompt: str, n: int = 6, seed: int = 0) -> List[str]:
+    """Return n short script ideas correlated with the structure prompt."""
+    random.seed(seed)
+    base = (structure_prompt or "").lower().strip()
+    ideas = []
+    for _ in range(n):
+        style = random.choice(["infomercial", "mall ad", "late-night", "newsflash", "arcade bumper"])
+        shot = random.choice(["neon grid", "CRT scanlines", "vaporwave sunset", "shopping mall", "boombox close-up"])
+        hook = random.choice([
+            "Remember this sound?", "Back to '87.", "Deal of the decade.", "We paused time.", "Be kind, rewind your brand.",
+        ])
+        idea = f"{hook} {style} with {shot}."
+        # Light correlation: echo key words from structure prompt
+        for kw in ["montage","testimonial","news","unboxing","before","after","countdown","logo","cta"]:
+            if kw in base and kw not in idea:
+                idea += f" Includes {kw}."
+        ideas.append(idea)
+    return ideas
+def roll_script(structure_prompt: str, seed: int = 0) -> str:
+    return random.choice(suggest_scripts(structure_prompt, n=6, seed=seed))

requirements.txt ADDED Viewed

	@@ -0,0 +1,53 @@

+# EceMotion Pictures - Production Requirements
+# Tested and verified versions for HuggingFace Spaces
+# Core ML/AI libraries - stable versions
+gradio==4.44.0
+transformers==4.44.2
+accelerate==0.34.0
+diffusers==0.31.0
+safetensors==0.4.3
+sentencepiece==0.2.0
+huggingface_hub==0.24.6
+# PyTorch ecosystem - stable versions
+torch==2.4.0
+torchvision==0.19.0
+torchaudio==2.4.0
+# Video processing - stable versions
+moviepy==1.0.3
+imageio[ffmpeg]==2.34.0
+ffmpeg-python==0.2.0
+# Audio processing - stable versions
+soundfile==0.12.1
+librosa==0.10.2
+scipy==1.11.4
+# Data processing
+numpy==1.26.4
+pandas==2.2.0
+# Configuration and validation
+pydantic==2.8.0
+python-dotenv==1.0.0
+# Logging and monitoring
+loguru==0.7.2
+# Additional dependencies for HuggingFace Spaces
+Pillow==10.2.0
+opencv-python==4.9.0.80
+matplotlib==3.8.4
+seaborn==0.13.2
+# Development and testing (optional)
+pytest==8.0.0
+black==24.0.0
+flake8==7.0.0
+# System dependencies (for HuggingFace Spaces)
+# These are typically pre-installed but listed for completeness
+# ffmpeg (system package)
+# git (system package)

sync_manager.py ADDED Viewed

	@@ -0,0 +1,381 @@

+"""
+Audio-Video Synchronization Manager for EceMotion Pictures.
+Ensures frame-perfect alignment between generated audio and video content.
+"""
+import os
+import tempfile
+import subprocess
+import numpy as np
+import logging
+from typing import Tuple, Optional, Dict, Any
+from pathlib import Path
+import shutil
+from config import SYNC_TOLERANCE_MS, FORCE_SYNC, AUDIO_SAMPLE_RATE
+logger = logging.getLogger(__name__)
+class SyncManager:
+    """Manages audio-video synchronization with frame-perfect accuracy."""
+    def __init__(self, tolerance_ms: int = SYNC_TOLERANCE_MS):
+        self.tolerance_ms = tolerance_ms
+        self.tolerance_s = tolerance_ms / 1000.0
+        self.ffmpeg_available = self._check_ffmpeg()
+    def _check_ffmpeg(self) -> bool:
+        """Check if ffmpeg is available."""
+        try:
+            subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
+            return True
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            logger.warning("ffmpeg not found, using fallback methods")
+            return False
+    def calculate_video_duration(self, num_frames: int, fps: float) -> float:
+        """Calculate exact video duration from frame count and FPS."""
+        return num_frames / fps
+    def measure_audio_duration(self, audio_path: str) -> float:
+        """Measure actual duration of audio file."""
+        if not os.path.exists(audio_path):
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+        if self.ffmpeg_available:
+            return self._measure_with_ffmpeg(audio_path)
+        else:
+            return self._measure_with_soundfile(audio_path)
+    def _measure_with_ffmpeg(self, audio_path: str) -> float:
+        """Measure duration using ffmpeg."""
+        try:
+            cmd = [
+                'ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
+                '-of', 'csv=p=0', audio_path
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            duration = float(result.stdout.strip())
+            logger.info(f"Audio duration (ffmpeg): {duration:.3f}s")
+            return duration
+        except (subprocess.CalledProcessError, ValueError) as e:
+            logger.error(f"Failed to measure audio duration with ffmpeg: {e}")
+            return self._measure_with_soundfile(audio_path)
+    def _measure_with_soundfile(self, audio_path: str) -> float:
+        """Measure duration using soundfile as fallback."""
+        try:
+            import soundfile as sf
+            info = sf.info(audio_path)
+            duration = info.duration
+            logger.info(f"Audio duration (soundfile): {duration:.3f}s")
+            return duration
+        except Exception as e:
+            logger.error(f"Failed to measure audio duration with soundfile: {e}")
+            # Last resort: estimate from file size
+            return self._estimate_duration_from_size(audio_path)
+    def _estimate_duration_from_size(self, audio_path: str) -> float:
+        """Estimate duration from file size (very rough estimate)."""
+        try:
+            file_size = os.path.getsize(audio_path)
+            # Rough estimate: 1MB ≈ 1 second for 128kbps audio
+            estimated_duration = file_size / (1024 * 1024)
+            logger.warning(f"Estimated audio duration from file size: {estimated_duration:.3f}s")
+            return estimated_duration
+        except Exception as e:
+            logger.error(f"Failed to estimate duration: {e}")
+            return 10.0  # Default fallback
+    def measure_video_duration(self, video_path: str) -> float:
+        """Measure actual duration of video file."""
+        if not os.path.exists(video_path):
+            raise FileNotFoundError(f"Video file not found: {video_path}")
+        if self.ffmpeg_available:
+            return self._measure_video_with_ffmpeg(video_path)
+        else:
+            return self._estimate_video_duration(video_path)
+    def _measure_video_with_ffmpeg(self, video_path: str) -> float:
+        """Measure video duration using ffmpeg."""
+        try:
+            cmd = [
+                'ffprobe', '-v', 'quiet', '-show_entries', 'format=duration',
+                '-of', 'csv=p=0', video_path
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            duration = float(result.stdout.strip())
+            logger.info(f"Video duration (ffmpeg): {duration:.3f}s")
+            return duration
+        except (subprocess.CalledProcessError, ValueError) as e:
+            logger.error(f"Failed to measure video duration with ffmpeg: {e}")
+            return self._estimate_video_duration(video_path)
+    def _estimate_video_duration(self, video_path: str) -> float:
+        """Estimate video duration (fallback method)."""
+        try:
+            # Try to get duration from filename or use default
+            filename = os.path.basename(video_path)
+            if '_' in filename:
+                # Try to extract duration from filename like "video_10s.mp4"
+                parts = filename.split('_')
+                for part in parts:
+                    if 's' in part:
+                        try:
+                            duration = float(part.replace('s', ''))
+                            logger.info(f"Estimated video duration from filename: {duration:.3f}s")
+                            return duration
+                        except ValueError:
+                            continue
+            # Default fallback
+            logger.warning("Using default video duration estimate: 10.0s")
+            return 10.0
+        except Exception as e:
+            logger.error(f"Failed to estimate video duration: {e}")
+            return 10.0
+    def adjust_audio_to_video(self, audio_path: str, target_duration: float,
+                            output_path: str) -> str:
+        """Adjust audio duration to match video duration."""
+        if self.ffmpeg_available:
+            return self._adjust_audio_with_ffmpeg(audio_path, target_duration, output_path)
+        else:
+            return self._adjust_audio_with_soundfile(audio_path, target_duration, output_path)
+    def _adjust_audio_with_ffmpeg(self, audio_path: str, target_duration: float,
+                                 output_path: str) -> str:
+        """Adjust audio using ffmpeg."""
+        try:
+            cmd = [
+                'ffmpeg', '-i', audio_path, '-t', str(target_duration),
+                '-af', 'apad', '-c:a', 'pcm_s16le', '-y', output_path
+            ]
+            subprocess.run(cmd, check=True, capture_output=True)
+            logger.info(f"Adjusted audio to {target_duration:.3f}s using ffmpeg")
+            return output_path
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Failed to adjust audio with ffmpeg: {e}")
+            return self._adjust_audio_with_soundfile(audio_path, target_duration, output_path)
+    def _adjust_audio_with_soundfile(self, audio_path: str, target_duration: float,
+                                   output_path: str) -> str:
+        """Adjust audio using soundfile (fallback)."""
+        try:
+            import soundfile as sf
+            # Read audio
+            audio_data, sample_rate = sf.read(audio_path)
+            # Calculate target samples
+            target_samples = int(target_duration * sample_rate)
+            if len(audio_data) < target_samples:
+                # Pad with silence
+                padding = np.zeros(target_samples - len(audio_data))
+                if len(audio_data.shape) > 1:  # Stereo
+                    padding = np.zeros((target_samples - len(audio_data), audio_data.shape[1]))
+                adjusted_audio = np.concatenate([audio_data, padding])
+            else:
+                # Trim to target length
+                adjusted_audio = audio_data[:target_samples]
+            # Write adjusted audio
+            sf.write(output_path, adjusted_audio, sample_rate)
+            logger.info(f"Adjusted audio to {target_duration:.3f}s using soundfile")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to adjust audio with soundfile: {e}")
+            # Last resort: just copy the file
+            shutil.copy2(audio_path, output_path)
+            return output_path
+    def adjust_video_to_audio(self, video_path: str, target_duration: float,
+                            output_path: str) -> str:
+        """Adjust video duration to match audio duration."""
+        if self.ffmpeg_available:
+            return self._adjust_video_with_ffmpeg(video_path, target_duration, output_path)
+        else:
+            # For video, we can't easily adjust without ffmpeg, so just copy
+            shutil.copy2(video_path, output_path)
+            return output_path
+    def _adjust_video_with_ffmpeg(self, video_path: str, target_duration: float,
+                                 output_path: str) -> str:
+        """Adjust video using ffmpeg."""
+        try:
+            cmd = [
+                'ffmpeg', '-i', video_path, '-t', str(target_duration),
+                '-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-y', output_path
+            ]
+            subprocess.run(cmd, check=True, capture_output=True)
+            logger.info(f"Adjusted video to {target_duration:.3f}s using ffmpeg")
+            return output_path
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Failed to adjust video with ffmpeg: {e}")
+            # Fallback: just copy
+            shutil.copy2(video_path, output_path)
+            return output_path
+    def validate_sync(self, video_path: str, audio_path: str) -> Tuple[bool, float]:
+        """Validate that audio and video are properly synchronized."""
+        try:
+            video_duration = self.measure_video_duration(video_path)
+            audio_duration = self.measure_audio_duration(audio_path)
+            duration_diff = abs(video_duration - audio_duration)
+            is_synced = duration_diff <= self.tolerance_s
+            logger.info(f"Sync validation: video={video_duration:.3f}s, "
+                       f"audio={audio_duration:.3f}s, diff={duration_diff:.3f}s, "
+                       f"synced={is_synced}")
+            return is_synced, duration_diff
+        except Exception as e:
+            logger.error(f"Sync validation failed: {e}")
+            return False, float('inf')
+    def synchronize_media(self, video_path: str, audio_path: str,
+                         output_path: str, prefer_audio_duration: bool = True) -> str:
+        """
+        Synchronize audio and video with frame-perfect accuracy.
+        """
+        try:
+            # Measure durations
+            video_duration = self.measure_video_duration(video_path)
+            audio_duration = self.measure_audio_duration(audio_path)
+            duration_diff = abs(video_duration - audio_duration)
+            # Check if already synchronized
+            if duration_diff <= self.tolerance_s:
+                logger.info("Media already synchronized, copying to output")
+                self._copy_media(video_path, audio_path, output_path)
+                return output_path
+            # Determine target duration
+            if prefer_audio_duration:
+                target_duration = audio_duration
+                logger.info(f"Adjusting video to match audio duration: {target_duration:.3f}s")
+            else:
+                target_duration = video_duration
+                logger.info(f"Adjusting audio to match video duration: {target_duration:.3f}s")
+            # Create temporary files for adjustments
+            with tempfile.TemporaryDirectory() as temp_dir:
+                temp_video = os.path.join(temp_dir, "temp_video.mp4")
+                temp_audio = os.path.join(temp_dir, "temp_audio.wav")
+                # Adjust durations
+                if prefer_audio_duration:
+                    self.adjust_video_to_audio(video_path, target_duration, temp_video)
+                    temp_audio = audio_path  # Use original audio
+                else:
+                    self.adjust_audio_to_video(audio_path, target_duration, temp_audio)
+                    temp_video = video_path  # Use original video
+                # Mux synchronized media
+                self._mux_media(temp_video, temp_audio, output_path)
+                # Validate final sync
+                is_synced, final_diff = self.validate_sync(output_path, output_path)
+                if not is_synced and FORCE_SYNC:
+                    logger.warning(f"Final sync validation failed with diff {final_diff:.3f}s")
+                else:
+                    logger.info("Media successfully synchronized")
+                return output_path
+        except Exception as e:
+            logger.error(f"Synchronization failed: {e}")
+            # Fallback: just copy video without audio
+            shutil.copy2(video_path, output_path)
+            return output_path
+    def _copy_media(self, video_path: str, audio_path: str, output_path: str):
+        """Copy and mux media without duration adjustment."""
+        self._mux_media(video_path, audio_path, output_path)
+    def _mux_media(self, video_path: str, audio_path: str, output_path: str):
+        """Mux video and audio with precise timing."""
+        if self.ffmpeg_available:
+            self._mux_with_ffmpeg(video_path, audio_path, output_path)
+        else:
+            self._mux_with_moviepy(video_path, audio_path, output_path)
+    def _mux_with_ffmpeg(self, video_path: str, audio_path: str, output_path: str):
+        """Mux using ffmpeg."""
+        try:
+            cmd = [
+                'ffmpeg', '-i', video_path, '-i', audio_path,
+                '-c:v', 'copy', '-c:a', 'aac', '-b:a', '128k',
+                '-shortest', '-fflags', '+shortest',
+                '-movflags', '+faststart', '-y', output_path
+            ]
+            subprocess.run(cmd, check=True, capture_output=True)
+            logger.info("Media successfully muxed with ffmpeg")
+        except subprocess.CalledProcessError as e:
+            logger.error(f"Media muxing with ffmpeg failed: {e}")
+            self._mux_with_moviepy(video_path, audio_path, output_path)
+    def _mux_with_moviepy(self, video_path: str, audio_path: str, output_path: str):
+        """Mux using moviepy (fallback)."""
+        try:
+            from moviepy.editor import VideoFileClip, AudioFileClip
+            # Load video and audio
+            video = VideoFileClip(video_path)
+            audio = AudioFileClip(audio_path)
+            # Set audio duration to match video
+            if audio.duration > video.duration:
+                audio = audio.subclip(0, video.duration)
+            elif audio.duration < video.duration:
+                # Pad audio with silence
+                from moviepy.audio.AudioClip import AudioClip
+                silence = AudioClip(lambda t: 0, duration=video.duration - audio.duration)
+                audio = audio.concatenate_audioclips([audio, silence])
+            # Combine and write
+            final_video = video.set_audio(audio)
+            final_video.write_videofile(
+                output_path,
+                codec='libx264',
+                audio_codec='aac',
+                temp_audiofile='temp-audio.m4a',
+                remove_temp=True,
+                verbose=False,
+                logger=None
+            )
+            # Clean up
+            video.close()
+            audio.close()
+            final_video.close()
+            logger.info("Media successfully muxed with moviepy")
+        except Exception as e:
+            logger.error(f"Media muxing with moviepy failed: {e}")
+            # Last resort: just copy video
+            shutil.copy2(video_path, output_path)
+    def get_optimal_frame_count(self, target_duration: float, fps: float) -> int:
+        """Calculate optimal frame count for target duration."""
+        frame_count = int(target_duration * fps)
+        # Ensure frame count is reasonable
+        frame_count = max(8, min(frame_count, 64))  # 1-8 seconds at 8fps
+        return frame_count
+    def estimate_audio_duration(self, text: str, words_per_minute: int = 150) -> float:
+        """Estimate audio duration from text length."""
+        word_count = len(text.split())
+        duration_minutes = word_count / words_per_minute
+        return duration_minutes * 60.0  # Convert to seconds
+def create_sync_manager() -> SyncManager:
+    """Factory function to create a SyncManager instance."""
+    return SyncManager()

utils_audio.py ADDED Viewed

	@@ -0,0 +1,292 @@

+"""
+Audio processing utilities for EceMotion Pictures.
+Enhanced text-to-speech generation with robust error handling and fallbacks.
+"""
+import numpy as np
+import logging
+import os
+from typing import Tuple, Optional, Dict, Any
+from config import (
+    MODEL_AUDIO, MODEL_CONFIGS, AUDIO_SAMPLE_RATE, get_device, get_safe_model_name
+)
+logger = logging.getLogger(__name__)
+# Global model cache
+_tts_pipe = None
+_current_tts_model = None
+def get_tts_pipe(model_name: str = MODEL_AUDIO, device: str = None):
+    """Get or create TTS pipeline with lazy loading and model switching."""
+    global _tts_pipe, _current_tts_model
+    if device is None:
+        device = get_device()
+    # Use safe model name
+    safe_model_name = get_safe_model_name(model_name, "audio")
+    if _tts_pipe is None or _current_tts_model != safe_model_name:
+        logger.info(f"Loading TTS model: {safe_model_name}")
+        try:
+            if "f5-tts" in safe_model_name.lower():
+                # Try F5-TTS first
+                _tts_pipe = _load_f5_tts(safe_model_name, device)
+            else:
+                # Use standard TTS pipeline
+                _tts_pipe = _load_standard_tts(safe_model_name, device)
+            if _tts_pipe is not None:
+                _current_tts_model = safe_model_name
+                logger.info(f"TTS model {safe_model_name} loaded successfully")
+            else:
+                raise RuntimeError("Failed to load any TTS model")
+        except Exception as e:
+            logger.error(f"Failed to load {safe_model_name}: {e}")
+            # Fallback to original model
+            _tts_pipe = _load_standard_tts("parler-tts/parler-tts-mini-v1", device)
+            _current_tts_model = "parler-tts/parler-tts-mini-v1"
+    return _tts_pipe
+def _load_f5_tts(model_name: str, device: str):
+    """Load F5-TTS model."""
+    try:
+        from transformers import pipeline
+        pipe = pipeline(
+            "text-to-speech",
+            model=model_name,
+            torch_dtype="auto",
+            device_map=device if device == "cuda" else None
+        )
+        return pipe
+    except Exception as e:
+        logger.error(f"Failed to load F5-TTS: {e}")
+        return None
+def _load_standard_tts(model_name: str, device: str):
+    """Load standard TTS model."""
+    try:
+        from transformers import pipeline
+        pipe = pipeline(
+            "text-to-speech",
+            model=model_name,
+            torch_dtype="auto"
+        )
+        if device == "cuda":
+            pipe = pipe.to(device)
+        return pipe
+    except Exception as e:
+        logger.error(f"Failed to load standard TTS: {e}")
+        return None
+def synth_voice(text: str, voice_prompt: str, sr: int = AUDIO_SAMPLE_RATE,
+                model_name: str = MODEL_AUDIO, device: str = None) -> Tuple[int, np.ndarray]:
+    """
+    Generate speech from text with enhanced TTS support.
+    """
+    if device is None:
+        device = get_device()
+    tts = get_tts_pipe(model_name, device)
+    model_config = MODEL_CONFIGS.get(_current_tts_model, {})
+    # Validate text length
+    max_length = model_config.get("max_text_length", 500)
+    min_length = model_config.get("min_text_length", 10)
+    if len(text) > max_length:
+        logger.warning(f"Text too long ({len(text)} chars), truncating to {max_length}")
+        text = text[:max_length]
+    elif len(text) < min_length:
+        logger.warning(f"Text too short ({len(text)} chars), padding")
+        text = text + " " * (min_length - len(text))
+    try:
+        if "f5-tts" in _current_tts_model.lower():
+            # F5-TTS specific generation
+            result = tts(
+                text=text,
+                voice_preset=voice_prompt,
+                return_tensors="pt"
+            )
+            wav = result["audio"].numpy().flatten()
+        else:
+            # Standard pipeline (Parler-TTS, etc.)
+            result = tts({"text": text, "voice_preset": voice_prompt})
+            wav = result["audio"]
+        # Ensure proper format
+        if hasattr(wav, 'numpy'):
+            wav = wav.numpy()
+        elif hasattr(wav, 'detach'):
+            wav = wav.detach().numpy()
+        # Normalize audio
+        wav = normalize_audio(wav)
+        # Resample if needed
+        if sr != AUDIO_SAMPLE_RATE:
+            wav = _resample_audio(wav, AUDIO_SAMPLE_RATE, sr)
+        logger.info(f"Generated audio: {len(wav)/sr:.2f}s at {sr}Hz")
+        return sr, wav.astype(np.float32)
+    except Exception as e:
+        logger.error(f"Voice synthesis failed: {e}")
+        # Return fallback audio
+        return _create_fallback_audio(text, sr)
+def _resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
+    """Resample audio using available methods."""
+    try:
+        import librosa
+        return librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
+    except ImportError:
+        # Simple resampling without librosa
+        ratio = target_sr / orig_sr
+        new_length = int(len(audio) * ratio)
+        return np.interp(
+            np.linspace(0, len(audio), new_length),
+            np.arange(len(audio)),
+            audio
+        )
+def _create_fallback_audio(text: str, sr: int) -> Tuple[int, np.ndarray]:
+    """Create fallback audio when TTS fails."""
+    try:
+        # Create a simple tone based on text length
+        duration = max(1.0, len(text) / 20.0)  # Rough estimate
+        t = np.linspace(0, duration, int(sr * duration), endpoint=False)
+        # Generate a simple tone
+        frequency = 440.0  # A4 note
+        wav = 0.1 * np.sin(2 * np.pi * frequency * t)
+        # Add some variation
+        wav += 0.05 * np.sin(2 * np.pi * frequency * 1.5 * t)
+        logger.info(f"Created fallback audio: {duration:.2f}s")
+        return sr, wav.astype(np.float32)
+    except Exception as e:
+        logger.error(f"Failed to create fallback audio: {e}")
+        # Last resort: silence
+        duration = 2.0
+        wav = np.zeros(int(sr * duration))
+        return sr, wav.astype(np.float32)
+def normalize_audio(audio: np.ndarray, target_lufs: float = -23.0) -> np.ndarray:
+    """Normalize audio to broadcast standards."""
+    # Simple peak normalization first
+    if np.max(np.abs(audio)) > 0:
+        audio = audio / np.max(np.abs(audio)) * 0.95
+    # Apply gentle compression
+    audio = apply_compression(audio)
+    return audio
+def apply_compression(audio: np.ndarray, ratio: float = 3.0, threshold: float = 0.7) -> np.ndarray:
+    """Apply gentle compression for broadcast quality."""
+    # Simple soft-knee compression
+    compressed = np.copy(audio)
+    # Above threshold, apply compression
+    above_threshold = np.abs(audio) > threshold
+    compressed[above_threshold] = np.sign(audio[above_threshold]) * (
+        threshold + (np.abs(audio[above_threshold]) - threshold) / ratio
+    )
+    return compressed
+def retro_bed(duration_s: float, sr: int = AUDIO_SAMPLE_RATE, bpm: int = 92):
+    """Generate retro synth background music."""
+    try:
+        t = np.linspace(0, duration_s, int(sr * duration_s), endpoint=False)
+        # Chord progression root frequencies (A minor style)
+        freqs = [220.0, 174.61, 196.0, 146.83]
+        seg_len = int(len(t) / len(freqs)) if len(freqs) else len(t)
+        sig = np.zeros_like(t)
+        for i, f0 in enumerate(freqs):
+            tri_t = t[i * seg_len:(i + 1) * seg_len]
+            tri = 2 * np.abs(2 * ((tri_t * f0) % 1) - 1) - 1
+            sig[i * seg_len:(i + 1) * seg_len] = 0.15 * tri
+        # Add tape noise
+        noise = 0.01 * np.random.randn(len(t))
+        bed = sig + noise
+        # Apply gentle lowpass filter
+        try:
+            from scipy import signal
+            b, a = signal.butter(3, 3000, 'low', fs=sr)
+            bed = signal.lfilter(b, a, bed)
+        except ImportError:
+            # Simple averaging filter if scipy not available
+            bed = np.convolve(bed, np.ones(5)/5, mode='same')
+        return sr, bed.astype(np.float32)
+    except Exception as e:
+        logger.error(f"Failed to generate retro bed: {e}")
+        # Return silence
+        silence = np.zeros(int(sr * duration_s))
+        return sr, silence.astype(np.float32)
+def mix_to_stereo(sr1, a, sr2, b, bed_gain=0.5):
+    """Mix two mono signals to stereo."""
+    assert sr1 == sr2, "Sample rates must match"
+    n = max(len(a), len(b))
+    def pad(x):
+        if len(x) < n:
+            if len(x.shape) > 1:  # Stereo
+                padding = np.zeros((n - len(x), x.shape[1]))
+            else:  # Mono
+                padding = np.zeros(n - len(x))
+            x = np.concatenate([x, padding])
+        return x
+    a = pad(a)
+    b = pad(b)
+    left = a + bed_gain * b
+    right = a * 0.9 + bed_gain * 0.9 * b
+    if len(left.shape) == 1:  # Mono to stereo
+        stereo = np.stack([left, right], axis=1)
+    else:  # Already stereo
+        stereo = np.stack([left, right], axis=1)
+    return sr1, np.clip(stereo, -1.0, 1.0)
+def write_wav(path: str, sr: int, wav: np.ndarray):
+    """Write audio to WAV file."""
+    try:
+        import soundfile as sf
+        sf.write(path, wav, sr)
+    except ImportError:
+        # Fallback using scipy
+        try:
+            from scipy.io import wavfile
+            # Convert to 16-bit
+            wav_16bit = (wav * 32767).astype(np.int16)
+            wavfile.write(path, sr, wav_16bit)
+        except ImportError:
+            logger.error("No audio writing library available (soundfile or scipy)")
+            raise RuntimeError("Cannot write audio file - no audio library available")

utils_video.py ADDED Viewed

	@@ -0,0 +1,336 @@

+"""
+Video processing utilities for EceMotion Pictures.
+Enhanced text-to-video generation with robust error handling and fallbacks.
+"""
+import numpy as np
+import logging
+import os
+import shutil
+from typing import Optional, Tuple, List
+from pathlib import Path
+from config import (
+    MODEL_VIDEO, MODEL_CONFIGS, get_device, VHS_INTENSITY, SCANLINE_OPACITY,
+    CHROMATIC_ABERRATION, FILM_GRAIN, get_safe_model_name
+)
+logger = logging.getLogger(__name__)
+# Global model cache
+t2v_pipe = None
+current_model = None
+def get_t2v_pipe(device: str, model_name: str = MODEL_VIDEO):
+    """Get or create T2V pipeline with lazy loading and model switching."""
+    global t2v_pipe, current_model
+    # Use safe model name
+    safe_model_name = get_safe_model_name(model_name, "video")
+    if t2v_pipe is None or current_model != safe_model_name:
+        logger.info(f"Loading T2V model: {safe_model_name}")
+        try:
+            if "cogvideox" in safe_model_name.lower():
+                # Try CogVideoX first
+                t2v_pipe = _load_cogvideox(safe_model_name, device)
+            else:
+                # Use standard diffusers pipeline
+                t2v_pipe = _load_standard_t2v(safe_model_name, device)
+            if t2v_pipe is not None:
+                current_model = safe_model_name
+                logger.info(f"T2V model {safe_model_name} loaded successfully")
+            else:
+                raise RuntimeError("Failed to load any T2V model")
+        except Exception as e:
+            logger.error(f"Failed to load {safe_model_name}: {e}")
+            # Fallback to original model
+            t2v_pipe = _load_standard_t2v("damo-vilab/text-to-video-ms-1.7b", device)
+            current_model = "damo-vilab/text-to-video-ms-1.7b"
+    return t2v_pipe
+def _load_cogvideox(model_name: str, device: str):
+    """Load CogVideoX model."""
+    try:
+        from diffusers import CogVideoXPipeline
+        pipe = CogVideoXPipeline.from_pretrained(
+            model_name,
+            torch_dtype="auto",
+            trust_remote_code=True
+        )
+        if device == "cuda":
+            pipe = pipe.to(device)
+        return pipe
+    except Exception as e:
+        logger.error(f"Failed to load CogVideoX: {e}")
+        return None
+def _load_standard_t2v(model_name: str, device: str):
+    """Load standard T2V model."""
+    try:
+        from diffusers import TextToVideoSDPipeline
+        pipe = TextToVideoSDPipeline.from_pretrained(
+            model_name,
+            torch_dtype="auto"
+        )
+        if device == "cuda":
+            pipe = pipe.to(device)
+        return pipe
+    except Exception as e:
+        logger.error(f"Failed to load standard T2V: {e}")
+        return None
+def synth_t2v(prompt: str, seed: int, num_frames: int = 32, fps: int = 8,
+              device: str = None, model_name: str = MODEL_VIDEO):
+    """
+    Generate text-to-video with enhanced model support and frame control.
+    """
+    if device is None:
+        device = get_device()
+    pipe = get_t2v_pipe(device, model_name)
+    model_config = MODEL_CONFIGS.get(current_model, {})
+    # Validate frame count against model limits
+    max_frames = model_config.get("max_frames", 32)
+    min_frames = model_config.get("min_frames", 8)
+    num_frames = max(min_frames, min(num_frames, max_frames))
+    logger.info(f"Generating {num_frames} frames at {fps}fps with {current_model}")
+    try:
+        # Set up generator
+        import torch
+        generator = torch.Generator(device=device).manual_seed(seed)
+        # Generate frames based on model type
+        if "cogvideox" in current_model.lower():
+            # CogVideoX specific generation
+            result = pipe(
+                prompt=prompt,
+                num_frames=num_frames,
+                generator=generator,
+                guidance_scale=7.5,
+                num_inference_steps=20
+            )
+            frames = result.frames
+        else:
+            # Standard pipeline
+            result = pipe(
+                prompt=prompt,
+                num_frames=num_frames,
+                generator=generator
+            )
+            frames = result.frames
+        # Convert to numpy arrays and create clip
+        frame_arrays = [np.array(frame) for frame in frames]
+        # Create clip using moviepy
+        from moviepy.editor import ImageSequenceClip
+        clip = ImageSequenceClip(frame_arrays, fps=fps)
+        logger.info(f"Generated video clip: {clip.duration:.2f}s, {len(frame_arrays)} frames")
+        return clip
+    except Exception as e:
+        logger.error(f"Video generation failed: {e}")
+        # Return a simple fallback clip
+        return _create_fallback_clip(prompt, num_frames, fps)
+def _create_fallback_clip(prompt: str, num_frames: int, fps: int):
+    """Create a simple fallback clip when video generation fails."""
+    try:
+        from moviepy.editor import ColorClip, TextClip, CompositeVideoClip
+        # Create a simple colored background
+        background = ColorClip(size=(640, 480), color=(100, 50, 200), duration=num_frames/fps)
+        # Add text overlay
+        text = TextClip(
+            prompt[:50] + "..." if len(prompt) > 50 else prompt,
+            fontsize=24,
+            color='white',
+            font='Arial-Bold'
+        ).set_position('center').set_duration(num_frames/fps)
+        # Composite the clips
+        clip = CompositeVideoClip([background, text])
+        logger.info(f"Created fallback clip: {clip.duration:.2f}s")
+        return clip
+    except Exception as e:
+        logger.error(f"Failed to create fallback clip: {e}")
+        # Last resort: create a simple color clip
+        from moviepy.editor import ColorClip
+        return ColorClip(size=(640, 480), color=(100, 50, 200), duration=5.0)
+def apply_retro_filters(input_path: str, output_path: str, intensity: float = VHS_INTENSITY):
+    """
+    Apply authentic VHS/CRT effects with enhanced visual artifacts.
+    """
+    logger.info(f"Applying retro filters with intensity {intensity}")
+    # Check if ffmpeg is available
+    if not _check_ffmpeg():
+        logger.warning("ffmpeg not available, using simple filter")
+        _apply_simple_retro_filters(input_path, output_path)
+        return
+    try:
+        # Build filter chain for authentic VHS look
+        filters = []
+        # 1. Format conversion
+        filters.append('format=yuv420p')
+        # 2. Basic color grading for retro look
+        filters.append(f'hue=s={0.8 + 0.2 * intensity}')
+        filters.append(f'eq=brightness={0.02 * intensity}:contrast={1.0 + 0.1 * intensity}:saturation={1.0 + 0.2 * intensity}:gamma={1.0 - 0.05 * intensity}')
+        # 3. VHS tracking lines and noise
+        if intensity > 0.3:
+            filters.append(f'tblend=all_mode=difference:all_opacity={0.05 * intensity}')
+            filters.append(f'noise=alls={int(20 * intensity)}:allf=t')
+        # 4. Film grain
+        if FILM_GRAIN > 0:
+            grain = FILM_GRAIN * intensity
+            filters.append(f'noise=alls={int(15 * grain)}:allf=u')
+        # 5. Vignetting
+        filters.append(f'vignette=PI/4:{0.3 * intensity}')
+        # Apply filters using ffmpeg
+        import ffmpeg
+        stream = ffmpeg.input(input_path)
+        # Apply filter chain
+        if len(filters) > 1:
+            filter_string = ','.join(filters)
+            stream = stream.filter_complex(filter_string)
+        else:
+            stream = stream.filter('format', 'yuv420p')
+        # Output with high quality settings
+        stream = stream.output(
+            output_path,
+            vcodec='libx264',
+            pix_fmt='yuv420p',
+            crf=20,  # Good quality
+            preset='medium',
+            movflags='+faststart'
+        )
+        stream.overwrite_output().run(quiet=True)
+        logger.info("Retro filters applied successfully")
+    except Exception as e:
+        logger.error(f"Failed to apply retro filters: {e}")
+        # Fallback to simple filter
+        _apply_simple_retro_filters(input_path, output_path)
+def _check_ffmpeg() -> bool:
+    """Check if ffmpeg is available."""
+    try:
+        import subprocess
+        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return False
+def _apply_simple_retro_filters(input_path: str, output_path: str):
+    """Fallback simple retro filter application."""
+    try:
+        import ffmpeg
+        (
+            ffmpeg
+            .input(input_path)
+            .filter('format', 'yuv420p')
+            .filter('tblend', all_mode='difference', all_opacity=0.05)
+            .filter('hue', s=0.9)
+            .filter('eq', brightness=0.02, contrast=1.05, saturation=1.1, gamma=0.98)
+            .filter('noise', alls=10)
+            .output(output_path, vcodec='libx264', pix_fmt='yuv420p', crf=20, movflags='+faststart')
+            .overwrite_output()
+            .run(quiet=True)
+        )
+        logger.info("Simple retro filters applied as fallback")
+    except Exception as e:
+        logger.error(f"Even simple retro filters failed: {e}")
+        # Just copy the file
+        shutil.copy2(input_path, output_path)
+def mux_audio(video_in: str, audio_in: str, out_path: str):
+    """Mux video and audio with error handling."""
+    try:
+        if _check_ffmpeg():
+            _mux_with_ffmpeg(video_in, audio_in, out_path)
+        else:
+            _mux_with_moviepy(video_in, audio_in, out_path)
+    except Exception as e:
+        logger.error(f"Audio muxing failed: {e}")
+        # Fallback: just copy video
+        shutil.copy2(video_in, out_path)
+def _mux_with_ffmpeg(video_in: str, audio_in: str, out_path: str):
+    """Mux using ffmpeg."""
+    import ffmpeg
+    (
+        ffmpeg
+        .input(video_in)
+        .input(audio_in)
+        .output(out_path, vcodec='copy', acodec='aac', audio_bitrate='128k', movflags='+faststart')
+        .overwrite_output()
+        .run(quiet=True)
+    )
+def _mux_with_moviepy(video_in: str, audio_in: str, out_path: str):
+    """Mux using moviepy (fallback)."""
+    from moviepy.editor import VideoFileClip, AudioFileClip
+    # Load video and audio
+    video = VideoFileClip(video_in)
+    audio = AudioFileClip(audio_in)
+    # Set audio duration to match video
+    if audio.duration > video.duration:
+        audio = audio.subclip(0, video.duration)
+    elif audio.duration < video.duration:
+        # Pad audio with silence
+        from moviepy.audio.AudioClip import AudioClip
+        silence = AudioClip(lambda t: 0, duration=video.duration - audio.duration)
+        audio = audio.concatenate_audioclips([audio, silence])
+    # Combine and write
+    final_video = video.set_audio(audio)
+    final_video.write_videofile(
+        out_path,
+        codec='libx264',
+        audio_codec='aac',
+        temp_audiofile='temp-audio.m4a',
+        remove_temp=True,
+        verbose=False,
+        logger=None
+    )
+    # Clean up
+    video.close()
+    audio.close()
+    final_video.close()