ford442 commited on
Commit
92cd32c
·
verified ·
1 Parent(s): 0b7096d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -76
app.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import spaces
2
  import os
3
  os.putenv('PYTORCH_NVML_BASED_CUDA_CHECK','1')
@@ -107,70 +111,79 @@ if latent_upsampler_instance:
107
  latent_upsampler_instance.to(target_inference_device)
108
 
109
 
110
- # --- Helper function for dimension calculation ---
111
- MIN_DIM_SLIDER = 256
112
- TARGET_FIXED_SIDE = 768
113
-
114
  def calculate_new_dimensions(orig_w, orig_h):
115
  if orig_w == 0 or orig_h == 0:
116
- return int(TARGET_FIXED_SIDE), int(TARGET_FIXED_SIDE)
117
  if orig_w >= orig_h:
118
- new_h = TARGET_FIXED_SIDE
119
- aspect_ratio = orig_w / orig_h
120
- new_w_ideal = new_h * aspect_ratio
121
- new_w = round(new_w_ideal / 32) * 32
122
- new_w = max(MIN_DIM_SLIDER, min(new_w, MAX_IMAGE_SIZE))
123
- new_h = max(MIN_DIM_SLIDER, min(new_h, MAX_IMAGE_SIZE))
124
  else:
125
- new_w = TARGET_FIXED_SIDE
126
- aspect_ratio = orig_h / orig_w
127
- new_h_ideal = new_w * aspect_ratio
128
- new_h = round(new_h_ideal / 32) * 32
129
- new_h = max(MIN_DIM_SLIDER, min(new_h, MAX_IMAGE_SIZE))
130
- new_w = max(MIN_DIM_SLIDER, min(new_w, MAX_IMAGE_SIZE))
131
- return int(new_h), int(new_w)
132
-
133
- def get_duration(prompt, negative_prompt, input_image_filepath, input_video_filepath,
134
- height_ui, width_ui, mode,
135
- duration_ui, ui_frames_to_use,
136
- seed_ui, randomize_seed, ui_guidance_scale, improve_texture_flag, num_steps, fps,
137
- progress):
138
  if duration_ui > 20.0: return 120
139
  if duration_ui > 13.0: return 90
140
  if duration_ui > 7.0: return 75
141
  if duration_ui > 5.0: return 60
142
- else: return 45
143
 
144
  def use_last_frame_as_input(video_filepath):
145
  if not video_filepath or not os.path.exists(video_filepath):
146
  gr.Warning("No video available to get the last frame from.")
147
  return None, gr.update()
148
  try:
149
- print(f"Extracting last frame from {video_filepath}")
150
  with imageio.get_reader(video_filepath) as reader:
151
- last_frame_np = None
152
- for frame in reader:
153
- last_frame_np = frame
154
- if last_frame_np is None:
155
- raise ValueError("Could not read any frames from the video.")
156
  pil_image = Image.fromarray(last_frame_np)
157
- temp_dir = tempfile.mkdtemp()
158
- timestamp = random.randint(10000, 99999)
159
- output_image_path = os.path.join(temp_dir, f"last_frame_{timestamp}.png")
160
  pil_image.save(output_image_path)
161
- print(f"Saved last frame to {output_image_path}")
162
  return output_image_path, gr.update(selected="i2v_tab")
163
  except Exception as e:
164
- print(f"Error extracting last frame: {e}")
165
  gr.Error(f"Failed to extract the last frame: {e}")
166
  return None, gr.update()
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  @spaces.GPU(duration=get_duration)
169
- def generate(prompt, negative_prompt, input_image_filepath=None, input_video_filepath=None,
 
170
  height_ui=512, width_ui=704, mode="text-to-video",
171
  duration_ui=2.0, ui_frames_to_use=9,
172
  seed_ui=42, randomize_seed=True, ui_guidance_scale=3.0, improve_texture_flag=True, num_steps=20, fps=30.0,
173
  progress=gr.Progress(track_tqdm=True)):
 
 
174
  if mode == "image-to-video" and not input_image_filepath:
175
  raise gr.Error("input_image_filepath is required for image-to-video mode")
176
  elif mode == "video-to-video" and not input_video_filepath:
@@ -231,33 +244,25 @@ def generate(prompt, negative_prompt, input_image_filepath=None, input_video_fil
231
  if result_images_tensor is None:
232
  raise gr.Error("Generation failed.")
233
  pad_left, pad_right, pad_top, pad_bottom = padding_values
234
- slice_h_end = -pad_bottom if pad_bottom > 0 else None
235
- slice_w_end = -pad_right if pad_right > 0 else None
236
- result_images_tensor = result_images_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
237
  video_np = (np.clip(result_images_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy(), 0, 1) * 255).astype(np.uint8)
238
  output_video_path = os.path.join(tempfile.mkdtemp(), f"output_{random.randint(10000,99999)}.mp4")
239
-
240
- # --- MODIFIED ---
241
- # This block is restored to the original, correct version that loops through
242
- # frames and uses `.append_data()` to save the video.
243
  try:
244
  with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], macro_block_size=1) as video_writer:
245
- for frame_idx in range(video_np.shape[0]):
246
- progress(frame_idx / video_np.shape[0], desc="Saving video")
247
- video_writer.append_data(video_np[frame_idx])
248
  except Exception as e:
249
- print(f"Error saving video with macro_block_size=1: {e}")
250
- try:
251
- with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], format='FFMPEG', codec='libx264', quality=10) as video_writer:
252
- for frame_idx in range(video_np.shape[0]):
253
- progress(frame_idx / video_np.shape[0], desc="Saving video (fallback ffmpeg)")
254
- video_writer.append_data(video_np[frame_idx])
255
- except Exception as e2:
256
- print(f"Fallback video saving error: {e2}")
257
- raise gr.Error(f"Failed to save video: {e2}")
258
-
259
- return output_video_path, seed_ui, gr.update(visible=True)
260
 
 
261
  def update_task_image(): return "image-to-video"
262
  def update_task_text(): return "text-to-video"
263
  def update_task_video(): return "video-to-video"
@@ -265,34 +270,52 @@ def update_task_video(): return "video-to-video"
265
  css="""#col-container{margin:0 auto;max-width:900px;}"""
266
 
267
  with gr.Blocks(css=css) as demo:
 
 
 
 
268
  gr.Markdown("# LTX Video 0.9.8 13B Distilled")
269
- gr.Markdown("Fast high quality video generation.**Update (17/07):** now with the new v0.9.8 for improved prompt understanding and detail generation" )
270
- gr.Markdown("[Model](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltxv-13b-0.9.8-distilled.safetensors) [GitHub](https://github.com/Lightricks/LTX-Video) [Diffusers](https://huggingface.co/Lightricks/LTX-Video-0.9.8-13B-distilled#diffusers-🧨)")
271
  with gr.Row():
272
  with gr.Column():
273
  with gr.Tabs() as tabs:
274
  with gr.Tab("image-to-video", id="i2v_tab") as image_tab:
 
275
  video_i_hidden = gr.Textbox(visible=False)
276
  image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam", "clipboard"])
277
  i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
278
- i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
279
  with gr.Tab("text-to-video", id="t2v_tab") as text_tab:
280
  image_n_hidden = gr.Textbox(visible=False)
281
  video_n_hidden = gr.Textbox(visible=False)
282
  t2v_prompt = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
283
- t2v_button = gr.Button("Generate Text-to-Video", variant="primary")
284
  with gr.Tab("video-to-video", id="v2v_tab") as video_tab:
285
  image_v_hidden = gr.Textbox(visible=False)
286
  video_v2v = gr.Video(label="Input Video", sources=["upload", "webcam"])
287
  frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=120, value=9, step=8, info="Must be N*8+1.")
288
  v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
289
- v2v_button = gr.Button("Generate Video-to-Video", variant="primary")
290
- duration_input = gr.Slider(label="Video Duration (seconds)", minimum=3.0, maximum=60.0, value=5.0, step=0.1)
 
291
  improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True)
 
292
  with gr.Column():
293
- output_video = gr.Video(label="Generated Video", interactive=False)
294
  use_last_frame_button = gr.Button("Use Last Frame as Input Image", visible=False)
 
 
 
 
 
 
 
 
 
 
295
  with gr.Accordion("Advanced settings", open=False):
 
296
  mode = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="task", value="image-to-video", visible=False)
297
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value="worst quality, inconsistent motion, blurry, jittery, distorted", lines=2)
298
  with gr.Row():
@@ -305,6 +328,8 @@ with gr.Blocks(css=css) as demo:
305
  width_input = gr.Slider(label="Width", value=768, step=32, minimum=32, maximum=MAX_IMAGE_SIZE)
306
  num_steps = gr.Slider(label="Steps", value=20, step=1, minimum=1, maximum=420)
307
  fps = gr.Slider(label="FPS", value=30.0, step=1.0, minimum=4.0, maximum=60.0)
 
 
308
  def handle_image_upload_for_dims(f, h, w):
309
  if not f: return gr.update(value=h), gr.update(value=w)
310
  img = Image.open(f)
@@ -322,18 +347,31 @@ with gr.Blocks(css=css) as demo:
322
  image_tab.select(update_task_image, outputs=[mode])
323
  text_tab.select(update_task_text, outputs=[mode])
324
  video_tab.select(update_task_video, outputs=[mode])
325
- t2v_inputs = [t2v_prompt, negative_prompt_input, image_n_hidden, video_n_hidden, height_input, width_input, mode, duration_input, frames_to_use, seed_input, randomize_seed_input, guidance_scale_input, improve_texture, num_steps, fps]
326
- i2v_inputs = [i2v_prompt, negative_prompt_input, image_i2v, video_i_hidden, height_input, width_input, mode, duration_input, frames_to_use, seed_input, randomize_seed_input, guidance_scale_input, improve_texture, num_steps, fps]
327
- v2v_inputs = [v2v_prompt, negative_prompt_input, image_v_hidden, video_v2v, height_input, width_input, mode, duration_input, frames_to_use, seed_input, randomize_seed_input, guidance_scale_input, improve_texture, num_steps, fps]
 
 
 
 
 
 
 
 
 
 
328
  hide_btn = lambda: gr.update(visible=False)
329
- t2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=t2v_inputs, outputs=[output_video, seed_input, use_last_frame_button], api_name="text_to_video")
330
- i2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=i2v_inputs, outputs=[output_video, seed_input, use_last_frame_button], api_name="image_to_video")
331
- v2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=v2v_inputs, outputs=[output_video, seed_input, use_last_frame_button], api_name="video_to_video")
332
- use_last_frame_button.click(
333
- fn=use_last_frame_as_input,
334
- inputs=[output_video],
335
- outputs=[image_i2v, tabs]
336
- )
 
 
 
337
 
338
  if __name__ == "__main__":
339
  if os.path.exists(models_dir): print(f"Model directory: {Path(models_dir).resolve()}")
 
1
+ # --- NEW ---
2
+ # Add moviepy for video stitching. Make sure to install it: pip install moviepy
3
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
4
+
5
  import spaces
6
  import os
7
  os.putenv('PYTORCH_NVML_BASED_CUDA_CHECK','1')
 
111
  latent_upsampler_instance.to(target_inference_device)
112
 
113
 
114
+ # --- Helper functions ---
 
 
 
115
  def calculate_new_dimensions(orig_w, orig_h):
116
  if orig_w == 0 or orig_h == 0:
117
+ return int(768), int(768)
118
  if orig_w >= orig_h:
119
+ new_h = 768
120
+ new_w = round((new_h * (orig_w / orig_h)) / 32) * 32
 
 
 
 
121
  else:
122
+ new_w = 768
123
+ new_h = round((new_w * (orig_h / orig_w)) / 32) * 32
124
+ return int(max(256, min(new_h, MAX_IMAGE_SIZE))), int(max(256, min(new_w, MAX_IMAGE_SIZE)))
125
+
126
+ def get_duration(*args, **kwargs): # Simplified for brevity
127
+ duration_ui = kwargs.get('duration_ui', 5.0)
 
 
 
 
 
 
 
128
  if duration_ui > 20.0: return 120
129
  if duration_ui > 13.0: return 90
130
  if duration_ui > 7.0: return 75
131
  if duration_ui > 5.0: return 60
132
+ return 45
133
 
134
  def use_last_frame_as_input(video_filepath):
135
  if not video_filepath or not os.path.exists(video_filepath):
136
  gr.Warning("No video available to get the last frame from.")
137
  return None, gr.update()
138
  try:
 
139
  with imageio.get_reader(video_filepath) as reader:
140
+ last_frame_np = next(reversed(list(reader)))
 
 
 
 
141
  pil_image = Image.fromarray(last_frame_np)
142
+ output_image_path = os.path.join(tempfile.mkdtemp(), f"last_frame_{random.randint(10000,99999)}.png")
 
 
143
  pil_image.save(output_image_path)
 
144
  return output_image_path, gr.update(selected="i2v_tab")
145
  except Exception as e:
 
146
  gr.Error(f"Failed to extract the last frame: {e}")
147
  return None, gr.update()
148
 
149
+ # --- NEW ---
150
+ # Function to stitch video clips together using moviepy
151
+ def stitch_videos(clips_list):
152
+ if not clips_list or len(clips_list) < 2:
153
+ raise gr.Error("You need at least two clips to stitch them together!")
154
+
155
+ print(f"Stitching {len(clips_list)} clips...")
156
+ try:
157
+ video_clips = [VideoFileClip(clip_path) for clip_path in clips_list]
158
+ final_clip = concatenate_videoclips(video_clips, method="compose")
159
+
160
+ final_output_path = os.path.join(tempfile.mkdtemp(), f"stitched_video_{random.randint(10000,99999)}.mp4")
161
+ final_clip.write_videofile(final_output_path, codec="libx264", audio=False, threads=4, preset='ultrafast')
162
+
163
+ # Close all clip objects to release file handles
164
+ for clip in video_clips:
165
+ clip.close()
166
+
167
+ print(f"Final video saved to {final_output_path}")
168
+ return final_output_path
169
+ except Exception as e:
170
+ print(f"Error during video stitching: {e}")
171
+ raise gr.Error(f"Failed to stitch videos: {e}")
172
+
173
+ # --- NEW ---
174
+ # Function to clear the clip list and reset the UI
175
+ def clear_clips():
176
+ return [], "Clips created: 0", None, None
177
+
178
  @spaces.GPU(duration=get_duration)
179
+ def generate(prompt, negative_prompt, clips_list, # --- MODIFIED --- added clips_list
180
+ input_image_filepath=None, input_video_filepath=None,
181
  height_ui=512, width_ui=704, mode="text-to-video",
182
  duration_ui=2.0, ui_frames_to_use=9,
183
  seed_ui=42, randomize_seed=True, ui_guidance_scale=3.0, improve_texture_flag=True, num_steps=20, fps=30.0,
184
  progress=gr.Progress(track_tqdm=True)):
185
+
186
+ # ... (most of the generate function logic is unchanged) ...
187
  if mode == "image-to-video" and not input_image_filepath:
188
  raise gr.Error("input_image_filepath is required for image-to-video mode")
189
  elif mode == "video-to-video" and not input_video_filepath:
 
244
  if result_images_tensor is None:
245
  raise gr.Error("Generation failed.")
246
  pad_left, pad_right, pad_top, pad_bottom = padding_values
247
+ result_images_tensor = result_images_tensor[:, :, :actual_num_frames, pad_top:(-pad_bottom or None), pad_left:(-pad_right or None)]
 
 
248
  video_np = (np.clip(result_images_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy(), 0, 1) * 255).astype(np.uint8)
249
  output_video_path = os.path.join(tempfile.mkdtemp(), f"output_{random.randint(10000,99999)}.mp4")
 
 
 
 
250
  try:
251
  with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], macro_block_size=1) as video_writer:
252
+ for frame_idx, frame in enumerate(video_np):
253
+ progress(frame_idx / len(video_np), desc="Saving video")
254
+ video_writer.append_data(frame)
255
  except Exception as e:
256
+ gr.Error(f"Failed to save video: {e}")
257
+
258
+ # --- MODIFIED ---
259
+ # Append the new clip to the list and prepare the updated state and counter text
260
+ updated_clips_list = clips_list + [output_video_path]
261
+ counter_text = f"Clips created: {len(updated_clips_list)}"
262
+
263
+ return output_video_path, seed_ui, gr.update(visible=True), updated_clips_list, counter_text
 
 
 
264
 
265
+ # ... (update_task functions are unchanged) ...
266
  def update_task_image(): return "image-to-video"
267
  def update_task_text(): return "text-to-video"
268
  def update_task_video(): return "video-to-video"
 
270
  css="""#col-container{margin:0 auto;max-width:900px;}"""
271
 
272
  with gr.Blocks(css=css) as demo:
273
+ # --- NEW ---
274
+ # Add a state component to store the list of clip paths
275
+ clips_state = gr.State([])
276
+
277
  gr.Markdown("# LTX Video 0.9.8 13B Distilled")
278
+ gr.Markdown("Generate short video clips and stitch them together to create a longer animation.")
279
+
280
  with gr.Row():
281
  with gr.Column():
282
  with gr.Tabs() as tabs:
283
  with gr.Tab("image-to-video", id="i2v_tab") as image_tab:
284
+ # ... (tab content is unchanged) ...
285
  video_i_hidden = gr.Textbox(visible=False)
286
  image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam", "clipboard"])
287
  i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
288
+ i2v_button = gr.Button("Generate Image-to-Video Clip", variant="primary")
289
  with gr.Tab("text-to-video", id="t2v_tab") as text_tab:
290
  image_n_hidden = gr.Textbox(visible=False)
291
  video_n_hidden = gr.Textbox(visible=False)
292
  t2v_prompt = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
293
+ t2v_button = gr.Button("Generate Text-to-Video Clip", variant="primary")
294
  with gr.Tab("video-to-video", id="v2v_tab") as video_tab:
295
  image_v_hidden = gr.Textbox(visible=False)
296
  video_v2v = gr.Video(label="Input Video", sources=["upload", "webcam"])
297
  frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=120, value=9, step=8, info="Must be N*8+1.")
298
  v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
299
+ v2v_button = gr.Button("Generate Video-to-Video Clip", variant="primary")
300
+
301
+ duration_input = gr.Slider(label="Clip Duration (seconds)", minimum=1.0, maximum=10.0, value=2.0, step=0.1)
302
  improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True)
303
+
304
  with gr.Column():
305
+ output_video = gr.Video(label="Last Generated Clip", interactive=False)
306
  use_last_frame_button = gr.Button("Use Last Frame as Input Image", visible=False)
307
+
308
+ # --- NEW ---
309
+ # Add UI components for stitching
310
+ with gr.Accordion("Stitching Controls", open=True):
311
+ clip_counter_display = gr.Markdown("Clips created: 0")
312
+ with gr.Row():
313
+ stitch_button = gr.Button("🎬 Stitch All Clips")
314
+ clear_button = gr.Button("🗑️ Clear All Clips")
315
+ final_video_output = gr.Video(label="Final Stitched Video", interactive=False)
316
+
317
  with gr.Accordion("Advanced settings", open=False):
318
+ # ... (advanced settings are unchanged) ...
319
  mode = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="task", value="image-to-video", visible=False)
320
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value="worst quality, inconsistent motion, blurry, jittery, distorted", lines=2)
321
  with gr.Row():
 
328
  width_input = gr.Slider(label="Width", value=768, step=32, minimum=32, maximum=MAX_IMAGE_SIZE)
329
  num_steps = gr.Slider(label="Steps", value=20, step=1, minimum=1, maximum=420)
330
  fps = gr.Slider(label="FPS", value=30.0, step=1.0, minimum=4.0, maximum=60.0)
331
+
332
+ # ... (event handlers for uploads and tab changes are unchanged) ...
333
  def handle_image_upload_for_dims(f, h, w):
334
  if not f: return gr.update(value=h), gr.update(value=w)
335
  img = Image.open(f)
 
347
  image_tab.select(update_task_image, outputs=[mode])
348
  text_tab.select(update_task_text, outputs=[mode])
349
  video_tab.select(update_task_video, outputs=[mode])
350
+
351
+ # --- MODIFIED ---
352
+ # The inputs and outputs for the generate buttons now include the clips_state and clip_counter_display
353
+ base_inputs = [negative_prompt_input, clips_state,
354
+ height_input, width_input, mode, duration_input, frames_to_use,
355
+ seed_input, randomize_seed_input, guidance_scale_input, improve_texture, num_steps, fps]
356
+
357
+ t2v_inputs = [t2v_prompt] + base_inputs + [image_n_hidden, video_n_hidden]
358
+ i2v_inputs = [i2v_prompt] + base_inputs + [image_i2v, video_i_hidden]
359
+ v2v_inputs = [v2v_prompt] + base_inputs + [image_v_hidden, video_v2v]
360
+
361
+ gen_outputs = [output_video, seed_input, use_last_frame_button, clips_state, clip_counter_display]
362
+
363
  hide_btn = lambda: gr.update(visible=False)
364
+ t2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=t2v_inputs, outputs=gen_outputs, api_name="text_to_video")
365
+ i2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=i2v_inputs, outputs=gen_outputs, api_name="image_to_video")
366
+ v2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=v2v_inputs, outputs=gen_outputs, api_name="video_to_video")
367
+
368
+ use_last_frame_button.click(fn=use_last_frame_as_input, inputs=[output_video], outputs=[image_i2v, tabs])
369
+
370
+ # --- NEW ---
371
+ # Add event handlers for the new stitching and clearing buttons
372
+ stitch_button.click(fn=stitch_videos, inputs=[clips_state], outputs=[final_video_output])
373
+ clear_button.click(fn=clear_clips, outputs=[clips_state, clip_counter_display, output_video, final_video_output])
374
+
375
 
376
  if __name__ == "__main__":
377
  if os.path.exists(models_dir): print(f"Model directory: {Path(models_dir).resolve()}")