Upload 3 files
Browse files- app.py +3 -3
- llm_script_generator.py +4 -0
- utils_video.py +11 -6
app.py
CHANGED
|
@@ -278,8 +278,8 @@ def create_interface():
|
|
| 278 |
logger.error(f"Script suggestion failed: {e}")
|
| 279 |
return "Back to '87 - the future is now!"
|
| 280 |
|
| 281 |
-
|
| 282 |
-
|
| 283 |
brand_name: str,
|
| 284 |
structure_text: str,
|
| 285 |
script_text: str,
|
|
@@ -334,7 +334,7 @@ def create_interface():
|
|
| 334 |
video_prompt = f"{structure_text}. {script_text}. 1980s commercial, VHS texture, soft lighting, bold retro titles, 4:3, brand {brand_name}"
|
| 335 |
|
| 336 |
# Calculate optimal frame count
|
| 337 |
-
num_frames = sync_manager.get_optimal_frame_count(duration_val, DEFAULT_FPS)
|
| 338 |
|
| 339 |
clip = synth_t2v(
|
| 340 |
prompt=video_prompt,
|
|
|
|
| 278 |
logger.error(f"Script suggestion failed: {e}")
|
| 279 |
return "Back to '87 - the future is now!"
|
| 280 |
|
| 281 |
+
@spaces.GPU(timeout=120) # 2 minute timeout for ZeroGPU
|
| 282 |
+
def generate_commercial(
|
| 283 |
brand_name: str,
|
| 284 |
structure_text: str,
|
| 285 |
script_text: str,
|
|
|
|
| 334 |
video_prompt = f"{structure_text}. {script_text}. 1980s commercial, VHS texture, soft lighting, bold retro titles, 4:3, brand {brand_name}"
|
| 335 |
|
| 336 |
# Calculate optimal frame count
|
| 337 |
+
num_frames = min(sync_manager.get_optimal_frame_count(duration_val, DEFAULT_FPS), 16) # Cap for ZeroGPU
|
| 338 |
|
| 339 |
clip = synth_t2v(
|
| 340 |
prompt=video_prompt,
|
llm_script_generator.py
CHANGED
|
@@ -214,6 +214,10 @@ Make it authentic to 1980s TV commercials with the energy and style of that era.
|
|
| 214 |
# Tokenize
|
| 215 |
inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
# Generate
|
| 218 |
self.model.eval()
|
| 219 |
outputs = self.model.generate(
|
|
|
|
| 214 |
# Tokenize
|
| 215 |
inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
| 216 |
|
| 217 |
+
# Move inputs to same device as model
|
| 218 |
+
device = next(self.model.parameters()).device
|
| 219 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 220 |
+
|
| 221 |
# Generate
|
| 222 |
self.model.eval()
|
| 223 |
outputs = self.model.generate(
|
utils_video.py
CHANGED
|
@@ -97,7 +97,7 @@ def _load_standard_t2v(model_name: str, device: str):
|
|
| 97 |
logger.error(f"Failed to load standard T2V: {e}")
|
| 98 |
return None
|
| 99 |
|
| 100 |
-
def synth_t2v(prompt: str, seed: int, num_frames: int =
|
| 101 |
device: str = None, model_name: str = MODEL_VIDEO):
|
| 102 |
"""
|
| 103 |
Generate text-to-video with enhanced model support and frame control.
|
|
@@ -108,11 +108,16 @@ def synth_t2v(prompt: str, seed: int, num_frames: int = 32, fps: int = 8,
|
|
| 108 |
pipe = get_t2v_pipe(device, model_name)
|
| 109 |
model_config = MODEL_CONFIGS.get(current_model, {})
|
| 110 |
|
| 111 |
-
# Validate frame count against model limits
|
| 112 |
-
max_frames = model_config.get("max_frames", 32
|
| 113 |
min_frames = model_config.get("min_frames", 8)
|
| 114 |
num_frames = max(min_frames, min(num_frames, max_frames))
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
logger.info(f"Generating {num_frames} frames at {fps}fps with {current_model}")
|
| 117 |
|
| 118 |
try:
|
|
@@ -122,13 +127,13 @@ def synth_t2v(prompt: str, seed: int, num_frames: int = 32, fps: int = 8,
|
|
| 122 |
|
| 123 |
# Generate frames based on model type
|
| 124 |
if "cogvideox" in current_model.lower():
|
| 125 |
-
# CogVideoX specific generation
|
| 126 |
result = pipe(
|
| 127 |
prompt=prompt,
|
| 128 |
num_frames=num_frames,
|
| 129 |
generator=generator,
|
| 130 |
-
guidance_scale=
|
| 131 |
-
num_inference_steps=
|
| 132 |
)
|
| 133 |
frames = result.frames
|
| 134 |
else:
|
|
|
|
| 97 |
logger.error(f"Failed to load standard T2V: {e}")
|
| 98 |
return None
|
| 99 |
|
| 100 |
+
def synth_t2v(prompt: str, seed: int, num_frames: int = 16, fps: int = 8,
|
| 101 |
device: str = None, model_name: str = MODEL_VIDEO):
|
| 102 |
"""
|
| 103 |
Generate text-to-video with enhanced model support and frame control.
|
|
|
|
| 108 |
pipe = get_t2v_pipe(device, model_name)
|
| 109 |
model_config = MODEL_CONFIGS.get(current_model, {})
|
| 110 |
|
| 111 |
+
# Validate frame count against model limits (reduced for ZeroGPU)
|
| 112 |
+
max_frames = model_config.get("max_frames", 16) # Reduced from 32
|
| 113 |
min_frames = model_config.get("min_frames", 8)
|
| 114 |
num_frames = max(min_frames, min(num_frames, max_frames))
|
| 115 |
|
| 116 |
+
# Force lower frame count for ZeroGPU timeout limits
|
| 117 |
+
if num_frames > 16:
|
| 118 |
+
num_frames = 16
|
| 119 |
+
logger.info(f"Reduced frame count to {num_frames} for ZeroGPU compatibility")
|
| 120 |
+
|
| 121 |
logger.info(f"Generating {num_frames} frames at {fps}fps with {current_model}")
|
| 122 |
|
| 123 |
try:
|
|
|
|
| 127 |
|
| 128 |
# Generate frames based on model type
|
| 129 |
if "cogvideox" in current_model.lower():
|
| 130 |
+
# CogVideoX specific generation (optimized for ZeroGPU)
|
| 131 |
result = pipe(
|
| 132 |
prompt=prompt,
|
| 133 |
num_frames=num_frames,
|
| 134 |
generator=generator,
|
| 135 |
+
guidance_scale=5.0, # Reduced for speed
|
| 136 |
+
num_inference_steps=10 # Reduced for speed
|
| 137 |
)
|
| 138 |
frames = result.frames
|
| 139 |
else:
|