Upload 3 files
Browse files- llm_script_generator.py +12 -11
- utils_audio.py +7 -1
- utils_video.py +34 -18
llm_script_generator.py
CHANGED
|
@@ -215,17 +215,17 @@ Make it authentic to 1980s TV commercials with the energy and style of that era.
|
|
| 215 |
inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
| 216 |
|
| 217 |
# Generate
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
|
| 230 |
# Decode response
|
| 231 |
response = self.tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
|
@@ -386,3 +386,4 @@ Make it authentic to 1980s TV commercials with the energy and style of that era.
|
|
| 386 |
def create_script_generator() -> LLMScriptGenerator:
|
| 387 |
"""Factory function to create a script generator."""
|
| 388 |
return LLMScriptGenerator()
|
|
|
|
|
|
| 215 |
inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
| 216 |
|
| 217 |
# Generate
|
| 218 |
+
self.model.eval()
|
| 219 |
+
outputs = self.model.generate(
|
| 220 |
+
**inputs,
|
| 221 |
+
max_new_tokens=self.model_config.get("max_tokens", 256),
|
| 222 |
+
temperature=self.model_config.get("temperature", 0.7),
|
| 223 |
+
top_p=self.model_config.get("top_p", 0.9),
|
| 224 |
+
do_sample=True,
|
| 225 |
+
pad_token_id=self.tokenizer.eos_token_id,
|
| 226 |
+
eos_token_id=self.tokenizer.eos_token_id,
|
| 227 |
+
num_return_sequences=1
|
| 228 |
+
)
|
| 229 |
|
| 230 |
# Decode response
|
| 231 |
response = self.tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
|
|
|
|
| 386 |
def create_script_generator() -> LLMScriptGenerator:
|
| 387 |
"""Factory function to create a script generator."""
|
| 388 |
return LLMScriptGenerator()
|
| 389 |
+
|
utils_audio.py
CHANGED
|
@@ -75,11 +75,16 @@ def _load_standard_tts(model_name: str, device: str):
|
|
| 75 |
"""Load standard TTS model."""
|
| 76 |
try:
|
| 77 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
pipe = pipeline(
|
| 80 |
"text-to-speech",
|
| 81 |
model=model_name,
|
| 82 |
-
torch_dtype="
|
| 83 |
)
|
| 84 |
|
| 85 |
if device == "cuda":
|
|
@@ -290,3 +295,4 @@ def write_wav(path: str, sr: int, wav: np.ndarray):
|
|
| 290 |
except ImportError:
|
| 291 |
logger.error("No audio writing library available (soundfile or scipy)")
|
| 292 |
raise RuntimeError("Cannot write audio file - no audio library available")
|
|
|
|
|
|
| 75 |
"""Load standard TTS model."""
|
| 76 |
try:
|
| 77 |
from transformers import pipeline
|
| 78 |
+
import torch
|
| 79 |
+
|
| 80 |
+
# Fix device string - convert "auto" to proper device
|
| 81 |
+
if device == "auto":
|
| 82 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 83 |
|
| 84 |
pipe = pipeline(
|
| 85 |
"text-to-speech",
|
| 86 |
model=model_name,
|
| 87 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32
|
| 88 |
)
|
| 89 |
|
| 90 |
if device == "cuda":
|
|
|
|
| 295 |
except ImportError:
|
| 296 |
logger.error("No audio writing library available (soundfile or scipy)")
|
| 297 |
raise RuntimeError("Cannot write audio file - no audio library available")
|
| 298 |
+
|
utils_video.py
CHANGED
|
@@ -77,10 +77,15 @@ def _load_standard_t2v(model_name: str, device: str):
|
|
| 77 |
"""Load standard T2V model."""
|
| 78 |
try:
|
| 79 |
from diffusers import TextToVideoSDPipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
pipe = TextToVideoSDPipeline.from_pretrained(
|
| 82 |
model_name,
|
| 83 |
-
torch_dtype="
|
| 84 |
)
|
| 85 |
|
| 86 |
if device == "cuda":
|
|
@@ -153,21 +158,11 @@ def synth_t2v(prompt: str, seed: int, num_frames: int = 32, fps: int = 8,
|
|
| 153 |
def _create_fallback_clip(prompt: str, num_frames: int, fps: int):
|
| 154 |
"""Create a simple fallback clip when video generation fails."""
|
| 155 |
try:
|
| 156 |
-
from moviepy.editor import ColorClip
|
| 157 |
-
|
| 158 |
-
# Create a simple colored background
|
| 159 |
-
background = ColorClip(size=(640, 480), color=(100, 50, 200), duration=num_frames/fps)
|
| 160 |
-
|
| 161 |
-
# Add text overlay
|
| 162 |
-
text = TextClip(
|
| 163 |
-
prompt[:50] + "..." if len(prompt) > 50 else prompt,
|
| 164 |
-
fontsize=24,
|
| 165 |
-
color='white',
|
| 166 |
-
font='Arial-Bold'
|
| 167 |
-
).set_position('center').set_duration(num_frames/fps)
|
| 168 |
|
| 169 |
-
#
|
| 170 |
-
|
|
|
|
| 171 |
|
| 172 |
logger.info(f"Created fallback clip: {clip.duration:.2f}s")
|
| 173 |
return clip
|
|
@@ -219,10 +214,30 @@ def apply_retro_filters(input_path: str, output_path: str, intensity: float = VH
|
|
| 219 |
|
| 220 |
stream = ffmpeg.input(input_path)
|
| 221 |
|
| 222 |
-
# Apply filter chain
|
| 223 |
if len(filters) > 1:
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
else:
|
| 227 |
stream = stream.filter('format', 'yuv420p')
|
| 228 |
|
|
@@ -334,3 +349,4 @@ def _mux_with_moviepy(video_in: str, audio_in: str, out_path: str):
|
|
| 334 |
video.close()
|
| 335 |
audio.close()
|
| 336 |
final_video.close()
|
|
|
|
|
|
| 77 |
"""Load standard T2V model."""
|
| 78 |
try:
|
| 79 |
from diffusers import TextToVideoSDPipeline
|
| 80 |
+
import torch
|
| 81 |
+
|
| 82 |
+
# Fix device string - convert "auto" to proper device
|
| 83 |
+
if device == "auto":
|
| 84 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 85 |
|
| 86 |
pipe = TextToVideoSDPipeline.from_pretrained(
|
| 87 |
model_name,
|
| 88 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32
|
| 89 |
)
|
| 90 |
|
| 91 |
if device == "cuda":
|
|
|
|
| 158 |
def _create_fallback_clip(prompt: str, num_frames: int, fps: int):
|
| 159 |
"""Create a simple fallback clip when video generation fails."""
|
| 160 |
try:
|
| 161 |
+
from moviepy.editor import ColorClip
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
# Create a simple colored background without text (avoids ImageMagick issues)
|
| 164 |
+
duration = num_frames / fps
|
| 165 |
+
clip = ColorClip(size=(640, 480), color=(100, 50, 200), duration=duration)
|
| 166 |
|
| 167 |
logger.info(f"Created fallback clip: {clip.duration:.2f}s")
|
| 168 |
return clip
|
|
|
|
| 214 |
|
| 215 |
stream = ffmpeg.input(input_path)
|
| 216 |
|
| 217 |
+
# Apply filter chain - use simple filters to avoid filter_complex issues
|
| 218 |
if len(filters) > 1:
|
| 219 |
+
# Apply filters one by one to avoid filter_complex issues
|
| 220 |
+
for filter_str in filters:
|
| 221 |
+
if filter_str == 'format=yuv420p':
|
| 222 |
+
stream = stream.filter('format', 'yuv420p')
|
| 223 |
+
elif 'hue=' in filter_str:
|
| 224 |
+
s_val = filter_str.split('s=')[1]
|
| 225 |
+
stream = stream.filter('hue', s=float(s_val))
|
| 226 |
+
elif 'eq=' in filter_str:
|
| 227 |
+
# Extract eq parameters
|
| 228 |
+
eq_params = filter_str.split('eq=')[1]
|
| 229 |
+
parts = eq_params.split(':')
|
| 230 |
+
brightness = float(parts[0].split('=')[1]) if 'brightness=' in parts[0] else 0
|
| 231 |
+
contrast = float(parts[1].split('=')[1]) if 'contrast=' in parts[1] else 1
|
| 232 |
+
saturation = float(parts[2].split('=')[1]) if 'saturation=' in parts[2] else 1
|
| 233 |
+
stream = stream.filter('eq', brightness=brightness, contrast=contrast, saturation=saturation)
|
| 234 |
+
elif 'noise=' in filter_str:
|
| 235 |
+
alls_val = int(filter_str.split('alls=')[1].split(':')[0])
|
| 236 |
+
stream = stream.filter('noise', alls=alls_val)
|
| 237 |
+
elif 'vignette=' in filter_str:
|
| 238 |
+
angle = float(filter_str.split('vignette=')[1].split(':')[0])
|
| 239 |
+
strength = float(filter_str.split(':')[1])
|
| 240 |
+
stream = stream.filter('vignette', angle=angle, strength=strength)
|
| 241 |
else:
|
| 242 |
stream = stream.filter('format', 'yuv420p')
|
| 243 |
|
|
|
|
| 349 |
video.close()
|
| 350 |
audio.close()
|
| 351 |
final_video.close()
|
| 352 |
+
|