Spaces:

drvsbrkcn
/

EceMotion_Pictures

Paused

drvsbrkcn commited on Oct 12

Commit

404465b

verified ·

1 Parent(s): 869d082

Upload 3 files

Files changed (3) hide show

app.py CHANGED Viewed

@@ -278,8 +278,8 @@ def create_interface():
                 logger.error(f"Script suggestion failed: {e}")
                 return "Back to '87 - the future is now!"
-        @spaces.GPU
-        def generate_commercial(
             brand_name: str,
             structure_text: str,
             script_text: str,
@@ -334,7 +334,7 @@ def create_interface():
                     video_prompt = f"{structure_text}. {script_text}. 1980s commercial, VHS texture, soft lighting, bold retro titles, 4:3, brand {brand_name}"
                     # Calculate optimal frame count
-                    num_frames = sync_manager.get_optimal_frame_count(duration_val, DEFAULT_FPS)
                     clip = synth_t2v(
                         prompt=video_prompt,

                 logger.error(f"Script suggestion failed: {e}")
                 return "Back to '87 - the future is now!"
+@spaces.GPU(timeout=120)  # 2 minute timeout for ZeroGPU
+def generate_commercial(
             brand_name: str,
             structure_text: str,
             script_text: str,
                     video_prompt = f"{structure_text}. {script_text}. 1980s commercial, VHS texture, soft lighting, bold retro titles, 4:3, brand {brand_name}"
                     # Calculate optimal frame count
+                    num_frames = min(sync_manager.get_optimal_frame_count(duration_val, DEFAULT_FPS), 16)  # Cap for ZeroGPU
                     clip = synth_t2v(
                         prompt=video_prompt,

llm_script_generator.py CHANGED Viewed

@@ -214,6 +214,10 @@ Make it authentic to 1980s TV commercials with the energy and style of that era.
         # Tokenize
         inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
         # Generate
         self.model.eval()
         outputs = self.model.generate(

         # Tokenize
         inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
+        # Move inputs to same device as model
+        device = next(self.model.parameters()).device
+        inputs = {k: v.to(device) for k, v in inputs.items()}
         # Generate
         self.model.eval()
         outputs = self.model.generate(

utils_video.py CHANGED Viewed

@@ -97,7 +97,7 @@ def _load_standard_t2v(model_name: str, device: str):
         logger.error(f"Failed to load standard T2V: {e}")
         return None
-def synth_t2v(prompt: str, seed: int, num_frames: int = 32, fps: int = 8,
               device: str = None, model_name: str = MODEL_VIDEO):
     """
     Generate text-to-video with enhanced model support and frame control.
@@ -108,11 +108,16 @@ def synth_t2v(prompt: str, seed: int, num_frames: int = 32, fps: int = 8,
     pipe = get_t2v_pipe(device, model_name)
     model_config = MODEL_CONFIGS.get(current_model, {})
-    # Validate frame count against model limits
-    max_frames = model_config.get("max_frames", 32)
     min_frames = model_config.get("min_frames", 8)
     num_frames = max(min_frames, min(num_frames, max_frames))
     logger.info(f"Generating {num_frames} frames at {fps}fps with {current_model}")
     try:
@@ -122,13 +127,13 @@ def synth_t2v(prompt: str, seed: int, num_frames: int = 32, fps: int = 8,
         # Generate frames based on model type
         if "cogvideox" in current_model.lower():
-            # CogVideoX specific generation
             result = pipe(
                 prompt=prompt,
                 num_frames=num_frames,
                 generator=generator,
-                guidance_scale=7.5,
-                num_inference_steps=20
             )
             frames = result.frames
         else:

         logger.error(f"Failed to load standard T2V: {e}")
         return None
+def synth_t2v(prompt: str, seed: int, num_frames: int = 16, fps: int = 8,
               device: str = None, model_name: str = MODEL_VIDEO):
     """
     Generate text-to-video with enhanced model support and frame control.
     pipe = get_t2v_pipe(device, model_name)
     model_config = MODEL_CONFIGS.get(current_model, {})
+    # Validate frame count against model limits (reduced for ZeroGPU)
+    max_frames = model_config.get("max_frames", 16)  # Reduced from 32
     min_frames = model_config.get("min_frames", 8)
     num_frames = max(min_frames, min(num_frames, max_frames))
+    # Force lower frame count for ZeroGPU timeout limits
+    if num_frames > 16:
+        num_frames = 16
+        logger.info(f"Reduced frame count to {num_frames} for ZeroGPU compatibility")
     logger.info(f"Generating {num_frames} frames at {fps}fps with {current_model}")
     try:
         # Generate frames based on model type
         if "cogvideox" in current_model.lower():
+            # CogVideoX specific generation (optimized for ZeroGPU)
             result = pipe(
                 prompt=prompt,
                 num_frames=num_frames,
                 generator=generator,
+                guidance_scale=5.0,      # Reduced for speed
+                num_inference_steps=10   # Reduced for speed
             )
             frames = result.frames
         else: