ford442 commited on
Commit
4bdb840
·
verified ·
1 Parent(s): 8df2c50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -131,12 +131,10 @@ def use_last_frame_as_input(video_filepath):
131
  return None, gr.update()
132
  try:
133
  with imageio.get_reader(video_filepath) as reader:
134
- # A robust way to get the last frame
135
  frame_list = list(reader)
136
  if not frame_list:
137
  raise ValueError("Video file is empty or unreadable.")
138
  last_frame_np = frame_list[-1]
139
-
140
  pil_image = Image.fromarray(last_frame_np)
141
  output_image_path = os.path.join(tempfile.mkdtemp(), f"last_frame_{random.randint(10000,99999)}.png")
142
  pil_image.save(output_image_path)
@@ -165,8 +163,6 @@ def stitch_videos(clips_list):
165
  def clear_clips():
166
  return [], "Clips created: 0", None, None
167
 
168
- # --- MODIFIED ---
169
- # The function signature has been reordered to match the way the inputs are assembled later.
170
  @spaces.GPU(duration=get_duration)
171
  def generate(prompt, negative_prompt, clips_list, input_image_filepath, input_video_filepath,
172
  height_ui, width_ui, mode, duration_ui, ui_frames_to_use,
@@ -192,17 +188,33 @@ def generate(prompt, negative_prompt, clips_list, input_image_filepath, input_vi
192
  height_padded = ((actual_height - 1) // 32 + 1) * 32
193
  width_padded = ((actual_width - 1) // 32 + 1) * 32
194
  padding_values = calculate_padding(actual_height, actual_width, height_padded, width_padded)
195
-
 
 
 
196
  call_kwargs = {
197
- "prompt": prompt, "negative_prompt": negative_prompt, "height": height_padded, "width": width_padded,
198
- "num_frames": max(9, ((actual_num_frames - 2) // 8 + 1) * 8 + 1), "num_inference_steps": num_steps, "frame_rate": int(fps),
199
- "generator": torch.Generator(device=target_inference_device).manual_seed(int(seed_ui)), "output_type": "pt",
200
- **PIPELINE_CONFIG_YAML,
201
- "is_video": True, "mixed_precision": (PIPELINE_CONFIG_YAML["precision"] == "mixed_precision"),
202
- "offload_to_cpu": False, "enhance_prompt": False,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  }
204
 
205
- # This logic can be simplified and organized
206
  stg_mode_str = PIPELINE_CONFIG_YAML.get("stg_mode", "attention_values").lower()
207
  stg_map = {
208
  "stg_av": SkipLayerStrategy.AttentionValues, "attention_values": SkipLayerStrategy.AttentionValues,
@@ -316,9 +328,7 @@ with gr.Blocks(css=css) as demo:
316
  image_tab.select(update_task_image, outputs=[mode])
317
  text_tab.select(update_task_text, outputs=[mode])
318
  video_tab.select(update_task_video, outputs=[mode])
319
-
320
- # --- MODIFIED ---
321
- # The input lists are now defined explicitly and in the correct order to match the `generate` function signature.
322
  common_params = [
323
  height_input, width_input, mode, duration_input, frames_to_use,
324
  seed_input, randomize_seed_input, guidance_scale_input, improve_texture, num_steps, fps
 
131
  return None, gr.update()
132
  try:
133
  with imageio.get_reader(video_filepath) as reader:
 
134
  frame_list = list(reader)
135
  if not frame_list:
136
  raise ValueError("Video file is empty or unreadable.")
137
  last_frame_np = frame_list[-1]
 
138
  pil_image = Image.fromarray(last_frame_np)
139
  output_image_path = os.path.join(tempfile.mkdtemp(), f"last_frame_{random.randint(10000,99999)}.png")
140
  pil_image.save(output_image_path)
 
163
  def clear_clips():
164
  return [], "Clips created: 0", None, None
165
 
 
 
166
  @spaces.GPU(duration=get_duration)
167
  def generate(prompt, negative_prompt, clips_list, input_image_filepath, input_video_filepath,
168
  height_ui, width_ui, mode, duration_ui, ui_frames_to_use,
 
188
  height_padded = ((actual_height - 1) // 32 + 1) * 32
189
  width_padded = ((actual_width - 1) // 32 + 1) * 32
190
  padding_values = calculate_padding(actual_height, actual_width, height_padded, width_padded)
191
+ num_frames_padded = max(9, ((actual_num_frames - 2) // 8 + 1) * 8 + 1)
192
+
193
+ # --- MODIFIED ---
194
+ # Reverted to the explicit dictionary construction to ensure all required keys are present.
195
  call_kwargs = {
196
+ "prompt": prompt,
197
+ "negative_prompt": negative_prompt,
198
+ "height": height_padded,
199
+ "width": width_padded,
200
+ "num_frames": num_frames_padded,
201
+ "num_inference_steps": num_steps,
202
+ "frame_rate": int(fps),
203
+ "generator": torch.Generator(device=target_inference_device).manual_seed(int(seed_ui)),
204
+ "output_type": "pt",
205
+ "conditioning_items": None,
206
+ "media_items": None,
207
+ "decode_timestep": PIPELINE_CONFIG_YAML["decode_timestep"],
208
+ "decode_noise_scale": PIPELINE_CONFIG_YAML["decode_noise_scale"],
209
+ "stochastic_sampling": PIPELINE_CONFIG_YAML["stochastic_sampling"],
210
+ "image_cond_noise_scale": 0.15,
211
+ "is_video": True,
212
+ "vae_per_channel_normalize": True,
213
+ "mixed_precision": (PIPELINE_CONFIG_YAML["precision"] == "mixed_precision"),
214
+ "offload_to_cpu": False,
215
+ "enhance_prompt": False,
216
  }
217
 
 
218
  stg_mode_str = PIPELINE_CONFIG_YAML.get("stg_mode", "attention_values").lower()
219
  stg_map = {
220
  "stg_av": SkipLayerStrategy.AttentionValues, "attention_values": SkipLayerStrategy.AttentionValues,
 
328
  image_tab.select(update_task_image, outputs=[mode])
329
  text_tab.select(update_task_text, outputs=[mode])
330
  video_tab.select(update_task_video, outputs=[mode])
331
+
 
 
332
  common_params = [
333
  height_input, width_input, mode, duration_input, frames_to_use,
334
  seed_input, randomize_seed_input, guidance_scale_input, improve_texture, num_steps, fps