drvsbrkcn commited on
Commit
869d082
·
verified ·
1 Parent(s): 321aaaf

Upload 3 files

Browse files
Files changed (3) hide show
  1. llm_script_generator.py +12 -11
  2. utils_audio.py +7 -1
  3. utils_video.py +34 -18
llm_script_generator.py CHANGED
@@ -215,17 +215,17 @@ Make it authentic to 1980s TV commercials with the energy and style of that era.
215
  inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
216
 
217
  # Generate
218
- with self.model.eval():
219
- outputs = self.model.generate(
220
- **inputs,
221
- max_new_tokens=self.model_config.get("max_tokens", 256),
222
- temperature=self.model_config.get("temperature", 0.7),
223
- top_p=self.model_config.get("top_p", 0.9),
224
- do_sample=True,
225
- pad_token_id=self.tokenizer.eos_token_id,
226
- eos_token_id=self.tokenizer.eos_token_id,
227
- num_return_sequences=1
228
- )
229
 
230
  # Decode response
231
  response = self.tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
@@ -386,3 +386,4 @@ Make it authentic to 1980s TV commercials with the energy and style of that era.
386
  def create_script_generator() -> LLMScriptGenerator:
387
  """Factory function to create a script generator."""
388
  return LLMScriptGenerator()
 
 
215
  inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
216
 
217
  # Generate
218
+ self.model.eval()
219
+ outputs = self.model.generate(
220
+ **inputs,
221
+ max_new_tokens=self.model_config.get("max_tokens", 256),
222
+ temperature=self.model_config.get("temperature", 0.7),
223
+ top_p=self.model_config.get("top_p", 0.9),
224
+ do_sample=True,
225
+ pad_token_id=self.tokenizer.eos_token_id,
226
+ eos_token_id=self.tokenizer.eos_token_id,
227
+ num_return_sequences=1
228
+ )
229
 
230
  # Decode response
231
  response = self.tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
 
386
  def create_script_generator() -> LLMScriptGenerator:
387
  """Factory function to create a script generator."""
388
  return LLMScriptGenerator()
389
+
utils_audio.py CHANGED
@@ -75,11 +75,16 @@ def _load_standard_tts(model_name: str, device: str):
75
  """Load standard TTS model."""
76
  try:
77
  from transformers import pipeline
 
 
 
 
 
78
 
79
  pipe = pipeline(
80
  "text-to-speech",
81
  model=model_name,
82
- torch_dtype="auto"
83
  )
84
 
85
  if device == "cuda":
@@ -290,3 +295,4 @@ def write_wav(path: str, sr: int, wav: np.ndarray):
290
  except ImportError:
291
  logger.error("No audio writing library available (soundfile or scipy)")
292
  raise RuntimeError("Cannot write audio file - no audio library available")
 
 
75
  """Load standard TTS model."""
76
  try:
77
  from transformers import pipeline
78
+ import torch
79
+
80
+ # Fix device string - convert "auto" to proper device
81
+ if device == "auto":
82
+ device = "cuda" if torch.cuda.is_available() else "cpu"
83
 
84
  pipe = pipeline(
85
  "text-to-speech",
86
  model=model_name,
87
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32
88
  )
89
 
90
  if device == "cuda":
 
295
  except ImportError:
296
  logger.error("No audio writing library available (soundfile or scipy)")
297
  raise RuntimeError("Cannot write audio file - no audio library available")
298
+
utils_video.py CHANGED
@@ -77,10 +77,15 @@ def _load_standard_t2v(model_name: str, device: str):
77
  """Load standard T2V model."""
78
  try:
79
  from diffusers import TextToVideoSDPipeline
 
 
 
 
 
80
 
81
  pipe = TextToVideoSDPipeline.from_pretrained(
82
  model_name,
83
- torch_dtype="auto"
84
  )
85
 
86
  if device == "cuda":
@@ -153,21 +158,11 @@ def synth_t2v(prompt: str, seed: int, num_frames: int = 32, fps: int = 8,
153
  def _create_fallback_clip(prompt: str, num_frames: int, fps: int):
154
  """Create a simple fallback clip when video generation fails."""
155
  try:
156
- from moviepy.editor import ColorClip, TextClip, CompositeVideoClip
157
-
158
- # Create a simple colored background
159
- background = ColorClip(size=(640, 480), color=(100, 50, 200), duration=num_frames/fps)
160
-
161
- # Add text overlay
162
- text = TextClip(
163
- prompt[:50] + "..." if len(prompt) > 50 else prompt,
164
- fontsize=24,
165
- color='white',
166
- font='Arial-Bold'
167
- ).set_position('center').set_duration(num_frames/fps)
168
 
169
- # Composite the clips
170
- clip = CompositeVideoClip([background, text])
 
171
 
172
  logger.info(f"Created fallback clip: {clip.duration:.2f}s")
173
  return clip
@@ -219,10 +214,30 @@ def apply_retro_filters(input_path: str, output_path: str, intensity: float = VH
219
 
220
  stream = ffmpeg.input(input_path)
221
 
222
- # Apply filter chain
223
  if len(filters) > 1:
224
- filter_string = ','.join(filters)
225
- stream = stream.filter_complex(filter_string)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  else:
227
  stream = stream.filter('format', 'yuv420p')
228
 
@@ -334,3 +349,4 @@ def _mux_with_moviepy(video_in: str, audio_in: str, out_path: str):
334
  video.close()
335
  audio.close()
336
  final_video.close()
 
 
77
  """Load standard T2V model."""
78
  try:
79
  from diffusers import TextToVideoSDPipeline
80
+ import torch
81
+
82
+ # Fix device string - convert "auto" to proper device
83
+ if device == "auto":
84
+ device = "cuda" if torch.cuda.is_available() else "cpu"
85
 
86
  pipe = TextToVideoSDPipeline.from_pretrained(
87
  model_name,
88
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32
89
  )
90
 
91
  if device == "cuda":
 
158
  def _create_fallback_clip(prompt: str, num_frames: int, fps: int):
159
  """Create a simple fallback clip when video generation fails."""
160
  try:
161
+ from moviepy.editor import ColorClip
 
 
 
 
 
 
 
 
 
 
 
162
 
163
+ # Create a simple colored background without text (avoids ImageMagick issues)
164
+ duration = num_frames / fps
165
+ clip = ColorClip(size=(640, 480), color=(100, 50, 200), duration=duration)
166
 
167
  logger.info(f"Created fallback clip: {clip.duration:.2f}s")
168
  return clip
 
214
 
215
  stream = ffmpeg.input(input_path)
216
 
217
+ # Apply filter chain - use simple filters to avoid filter_complex issues
218
  if len(filters) > 1:
219
+ # Apply filters one by one to avoid filter_complex issues
220
+ for filter_str in filters:
221
+ if filter_str == 'format=yuv420p':
222
+ stream = stream.filter('format', 'yuv420p')
223
+ elif 'hue=' in filter_str:
224
+ s_val = filter_str.split('s=')[1]
225
+ stream = stream.filter('hue', s=float(s_val))
226
+ elif 'eq=' in filter_str:
227
+ # Extract eq parameters
228
+ eq_params = filter_str.split('eq=')[1]
229
+ parts = eq_params.split(':')
230
+ brightness = float(parts[0].split('=')[1]) if 'brightness=' in parts[0] else 0
231
+ contrast = float(parts[1].split('=')[1]) if 'contrast=' in parts[1] else 1
232
+ saturation = float(parts[2].split('=')[1]) if 'saturation=' in parts[2] else 1
233
+ stream = stream.filter('eq', brightness=brightness, contrast=contrast, saturation=saturation)
234
+ elif 'noise=' in filter_str:
235
+ alls_val = int(filter_str.split('alls=')[1].split(':')[0])
236
+ stream = stream.filter('noise', alls=alls_val)
237
+ elif 'vignette=' in filter_str:
238
+ angle = float(filter_str.split('vignette=')[1].split(':')[0])
239
+ strength = float(filter_str.split(':')[1])
240
+ stream = stream.filter('vignette', angle=angle, strength=strength)
241
  else:
242
  stream = stream.filter('format', 'yuv420p')
243
 
 
349
  video.close()
350
  audio.close()
351
  final_video.close()
352
+