Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,4 @@
|
|
| 1 |
-
# --- NEW ---
|
| 2 |
-
# Add moviepy for video stitching. Make sure to install it: pip install moviepy
|
| 3 |
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
| 4 |
-
|
| 5 |
import spaces
|
| 6 |
import os
|
| 7 |
os.putenv('PYTORCH_NVML_BASED_CUDA_CHECK','1')
|
|
@@ -56,7 +53,6 @@ LTX_REPO = "Lightricks/LTX-Video"
|
|
| 56 |
MAX_IMAGE_SIZE = PIPELINE_CONFIG_YAML.get("max_resolution", 1280)
|
| 57 |
MAX_NUM_FRAMES = 900
|
| 58 |
|
| 59 |
-
# --- Global variables for loaded models ---
|
| 60 |
pipeline_instance = None
|
| 61 |
latent_upsampler_instance = None
|
| 62 |
models_dir = "downloaded_models_gradio_cpu_init"
|
|
@@ -110,8 +106,6 @@ pipeline_instance.to(target_inference_device)
|
|
| 110 |
if latent_upsampler_instance:
|
| 111 |
latent_upsampler_instance.to(target_inference_device)
|
| 112 |
|
| 113 |
-
|
| 114 |
-
# --- Helper functions ---
|
| 115 |
def calculate_new_dimensions(orig_w, orig_h):
|
| 116 |
if orig_w == 0 or orig_h == 0:
|
| 117 |
return int(768), int(768)
|
|
@@ -123,7 +117,7 @@ def calculate_new_dimensions(orig_w, orig_h):
|
|
| 123 |
new_h = round((new_w * (orig_h / orig_w)) / 32) * 32
|
| 124 |
return int(max(256, min(new_h, MAX_IMAGE_SIZE))), int(max(256, min(new_w, MAX_IMAGE_SIZE)))
|
| 125 |
|
| 126 |
-
def get_duration(*args, **kwargs):
|
| 127 |
duration_ui = kwargs.get('duration_ui', 5.0)
|
| 128 |
if duration_ui > 20.0: return 120
|
| 129 |
if duration_ui > 13.0: return 90
|
|
@@ -137,7 +131,12 @@ def use_last_frame_as_input(video_filepath):
|
|
| 137 |
return None, gr.update()
|
| 138 |
try:
|
| 139 |
with imageio.get_reader(video_filepath) as reader:
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
pil_image = Image.fromarray(last_frame_np)
|
| 142 |
output_image_path = os.path.join(tempfile.mkdtemp(), f"last_frame_{random.randint(10000,99999)}.png")
|
| 143 |
pil_image.save(output_image_path)
|
|
@@ -146,123 +145,105 @@ def use_last_frame_as_input(video_filepath):
|
|
| 146 |
gr.Error(f"Failed to extract the last frame: {e}")
|
| 147 |
return None, gr.update()
|
| 148 |
|
| 149 |
-
# --- NEW ---
|
| 150 |
-
# Function to stitch video clips together using moviepy
|
| 151 |
def stitch_videos(clips_list):
|
| 152 |
if not clips_list or len(clips_list) < 2:
|
| 153 |
raise gr.Error("You need at least two clips to stitch them together!")
|
| 154 |
-
|
| 155 |
print(f"Stitching {len(clips_list)} clips...")
|
| 156 |
try:
|
| 157 |
video_clips = [VideoFileClip(clip_path) for clip_path in clips_list]
|
| 158 |
final_clip = concatenate_videoclips(video_clips, method="compose")
|
| 159 |
-
|
| 160 |
final_output_path = os.path.join(tempfile.mkdtemp(), f"stitched_video_{random.randint(10000,99999)}.mp4")
|
| 161 |
final_clip.write_videofile(final_output_path, codec="libx264", audio=False, threads=4, preset='ultrafast')
|
| 162 |
-
|
| 163 |
-
# Close all clip objects to release file handles
|
| 164 |
for clip in video_clips:
|
| 165 |
clip.close()
|
| 166 |
-
|
| 167 |
print(f"Final video saved to {final_output_path}")
|
| 168 |
return final_output_path
|
| 169 |
except Exception as e:
|
| 170 |
print(f"Error during video stitching: {e}")
|
| 171 |
raise gr.Error(f"Failed to stitch videos: {e}")
|
| 172 |
|
| 173 |
-
# --- NEW ---
|
| 174 |
-
# Function to clear the clip list and reset the UI
|
| 175 |
def clear_clips():
|
| 176 |
return [], "Clips created: 0", None, None
|
| 177 |
|
|
|
|
|
|
|
| 178 |
@spaces.GPU(duration=get_duration)
|
| 179 |
-
def generate(prompt, negative_prompt, clips_list,
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
duration_ui=2.0, ui_frames_to_use=9,
|
| 183 |
-
seed_ui=42, randomize_seed=True, ui_guidance_scale=3.0, improve_texture_flag=True, num_steps=20, fps=30.0,
|
| 184 |
progress=gr.Progress(track_tqdm=True)):
|
| 185 |
-
|
| 186 |
-
|
|
|
|
| 187 |
if mode == "image-to-video" and not input_image_filepath:
|
| 188 |
raise gr.Error("input_image_filepath is required for image-to-video mode")
|
| 189 |
elif mode == "video-to-video" and not input_video_filepath:
|
| 190 |
raise gr.Error("input_video_filepath is required for video-to-video mode")
|
| 191 |
-
|
| 192 |
-
raise gr.Error(f"Invalid mode: {mode}.")
|
| 193 |
if randomize_seed:
|
| 194 |
seed_ui = random.randint(0, 2**32 - 1)
|
| 195 |
seed_everething(int(seed_ui))
|
|
|
|
| 196 |
target_frames_ideal = duration_ui * fps
|
| 197 |
target_frames_rounded = max(1, round(target_frames_ideal))
|
| 198 |
n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
|
| 199 |
actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
|
| 200 |
-
actual_height = int(height_ui)
|
| 201 |
-
actual_width = int(width_ui)
|
| 202 |
height_padded = ((actual_height - 1) // 32 + 1) * 32
|
| 203 |
width_padded = ((actual_width - 1) // 32 + 1) * 32
|
| 204 |
-
num_frames_padded = ((actual_num_frames - 2) // 8 + 1) * 8 + 1
|
| 205 |
padding_values = calculate_padding(actual_height, actual_width, height_padded, width_padded)
|
|
|
|
| 206 |
call_kwargs = {
|
| 207 |
"prompt": prompt, "negative_prompt": negative_prompt, "height": height_padded, "width": width_padded,
|
| 208 |
-
"num_frames":
|
| 209 |
"generator": torch.Generator(device=target_inference_device).manual_seed(int(seed_ui)), "output_type": "pt",
|
| 210 |
-
|
| 211 |
-
"
|
| 212 |
-
"
|
| 213 |
-
"offload_to_cpu": False, "enhance_prompt": False, "conditioning_items": None, "media_items": None
|
| 214 |
}
|
| 215 |
-
|
|
|
|
|
|
|
| 216 |
stg_map = {
|
| 217 |
"stg_av": SkipLayerStrategy.AttentionValues, "attention_values": SkipLayerStrategy.AttentionValues,
|
| 218 |
"stg_as": SkipLayerStrategy.AttentionSkip, "attention_skip": SkipLayerStrategy.AttentionSkip,
|
| 219 |
"stg_r": SkipLayerStrategy.Residual, "residual": SkipLayerStrategy.Residual,
|
| 220 |
"stg_t": SkipLayerStrategy.TransformerBlock, "transformer_block": SkipLayerStrategy.TransformerBlock,
|
| 221 |
}
|
| 222 |
-
call_kwargs["skip_layer_strategy"] = stg_map.get(stg_mode_str.
|
| 223 |
-
|
| 224 |
-
raise ValueError(f"Invalid stg_mode: {stg_mode_str}")
|
| 225 |
if mode == "image-to-video":
|
| 226 |
media_tensor = load_image_to_tensor_with_resize_and_crop(input_image_filepath, actual_height, actual_width)
|
| 227 |
-
|
| 228 |
-
call_kwargs["conditioning_items"] = [ConditioningItem(media_tensor.to(target_inference_device), 0, 1.0)]
|
| 229 |
elif mode == "video-to-video":
|
| 230 |
call_kwargs["media_items"] = load_media_file(media_path=input_video_filepath, height=actual_height, width=actual_width, max_frames=int(ui_frames_to_use), padding=padding_values).to(target_inference_device)
|
| 231 |
-
|
| 232 |
-
if improve_texture_flag:
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
second_pass_args = {**PIPELINE_CONFIG_YAML.get("second_pass", {}), "guidance_scale": float(ui_guidance_scale)}
|
| 238 |
-
multi_scale_call_kwargs = {**call_kwargs, "downscale_factor": PIPELINE_CONFIG_YAML["downscale_factor"], "first_pass": first_pass_args, "second_pass": second_pass_args}
|
| 239 |
-
result_images_tensor = multi_scale_pipeline_obj(**multi_scale_call_kwargs).images
|
| 240 |
else:
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
if result_images_tensor is None:
|
| 245 |
raise gr.Error("Generation failed.")
|
| 246 |
-
|
| 247 |
-
|
|
|
|
| 248 |
video_np = (np.clip(result_images_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy(), 0, 1) * 255).astype(np.uint8)
|
|
|
|
| 249 |
output_video_path = os.path.join(tempfile.mkdtemp(), f"output_{random.randint(10000,99999)}.mp4")
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
video_writer.append_data(frame)
|
| 255 |
-
except Exception as e:
|
| 256 |
-
gr.Error(f"Failed to save video: {e}")
|
| 257 |
|
| 258 |
-
# --- MODIFIED ---
|
| 259 |
-
# Append the new clip to the list and prepare the updated state and counter text
|
| 260 |
updated_clips_list = clips_list + [output_video_path]
|
| 261 |
counter_text = f"Clips created: {len(updated_clips_list)}"
|
| 262 |
-
|
| 263 |
return output_video_path, seed_ui, gr.update(visible=True), updated_clips_list, counter_text
|
| 264 |
|
| 265 |
-
# ... (update_task functions are unchanged) ...
|
| 266 |
def update_task_image(): return "image-to-video"
|
| 267 |
def update_task_text(): return "text-to-video"
|
| 268 |
def update_task_video(): return "video-to-video"
|
|
@@ -270,18 +251,13 @@ def update_task_video(): return "video-to-video"
|
|
| 270 |
css="""#col-container{margin:0 auto;max-width:900px;}"""
|
| 271 |
|
| 272 |
with gr.Blocks(css=css) as demo:
|
| 273 |
-
# --- NEW ---
|
| 274 |
-
# Add a state component to store the list of clip paths
|
| 275 |
clips_state = gr.State([])
|
| 276 |
-
|
| 277 |
-
gr.Markdown("# LTX Video 0.9.8 13B Distilled")
|
| 278 |
gr.Markdown("Generate short video clips and stitch them together to create a longer animation.")
|
| 279 |
-
|
| 280 |
with gr.Row():
|
| 281 |
with gr.Column():
|
| 282 |
with gr.Tabs() as tabs:
|
| 283 |
with gr.Tab("image-to-video", id="i2v_tab") as image_tab:
|
| 284 |
-
# ... (tab content is unchanged) ...
|
| 285 |
video_i_hidden = gr.Textbox(visible=False)
|
| 286 |
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam", "clipboard"])
|
| 287 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
|
@@ -297,25 +273,18 @@ with gr.Blocks(css=css) as demo:
|
|
| 297 |
frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=120, value=9, step=8, info="Must be N*8+1.")
|
| 298 |
v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
|
| 299 |
v2v_button = gr.Button("Generate Video-to-Video Clip", variant="primary")
|
| 300 |
-
|
| 301 |
duration_input = gr.Slider(label="Clip Duration (seconds)", minimum=1.0, maximum=10.0, value=2.0, step=0.1)
|
| 302 |
improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True)
|
| 303 |
-
|
| 304 |
with gr.Column():
|
| 305 |
output_video = gr.Video(label="Last Generated Clip", interactive=False)
|
| 306 |
use_last_frame_button = gr.Button("Use Last Frame as Input Image", visible=False)
|
| 307 |
-
|
| 308 |
-
# --- NEW ---
|
| 309 |
-
# Add UI components for stitching
|
| 310 |
with gr.Accordion("Stitching Controls", open=True):
|
| 311 |
clip_counter_display = gr.Markdown("Clips created: 0")
|
| 312 |
with gr.Row():
|
| 313 |
stitch_button = gr.Button("🎬 Stitch All Clips")
|
| 314 |
clear_button = gr.Button("🗑️ Clear All Clips")
|
| 315 |
final_video_output = gr.Video(label="Final Stitched Video", interactive=False)
|
| 316 |
-
|
| 317 |
with gr.Accordion("Advanced settings", open=False):
|
| 318 |
-
# ... (advanced settings are unchanged) ...
|
| 319 |
mode = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="task", value="image-to-video", visible=False)
|
| 320 |
negative_prompt_input = gr.Textbox(label="Negative Prompt", value="worst quality, inconsistent motion, blurry, jittery, distorted", lines=2)
|
| 321 |
with gr.Row():
|
|
@@ -329,7 +298,6 @@ with gr.Blocks(css=css) as demo:
|
|
| 329 |
num_steps = gr.Slider(label="Steps", value=20, step=1, minimum=1, maximum=420)
|
| 330 |
fps = gr.Slider(label="FPS", value=30.0, step=1.0, minimum=4.0, maximum=60.0)
|
| 331 |
|
| 332 |
-
# ... (event handlers for uploads and tab changes are unchanged) ...
|
| 333 |
def handle_image_upload_for_dims(f, h, w):
|
| 334 |
if not f: return gr.update(value=h), gr.update(value=w)
|
| 335 |
img = Image.open(f)
|
|
@@ -342,6 +310,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 342 |
orig_w, orig_h = meta.get('size', (reader.get_data(0).shape[1], reader.get_data(0).shape[0]))
|
| 343 |
new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
|
| 344 |
return gr.update(value=new_h), gr.update(value=new_w)
|
|
|
|
| 345 |
image_i2v.upload(handle_image_upload_for_dims, [image_i2v, height_input, width_input], [height_input, width_input])
|
| 346 |
video_v2v.upload(handle_video_upload_for_dims, [video_v2v, height_input, width_input], [height_input, width_input])
|
| 347 |
image_tab.select(update_task_image, outputs=[mode])
|
|
@@ -349,14 +318,14 @@ with gr.Blocks(css=css) as demo:
|
|
| 349 |
video_tab.select(update_task_video, outputs=[mode])
|
| 350 |
|
| 351 |
# --- MODIFIED ---
|
| 352 |
-
# The
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
t2v_inputs = [t2v_prompt
|
| 358 |
-
i2v_inputs = [i2v_prompt
|
| 359 |
-
v2v_inputs = [v2v_prompt
|
| 360 |
|
| 361 |
gen_outputs = [output_video, seed_input, use_last_frame_button, clips_state, clip_counter_display]
|
| 362 |
|
|
@@ -366,13 +335,9 @@ with gr.Blocks(css=css) as demo:
|
|
| 366 |
v2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=v2v_inputs, outputs=gen_outputs, api_name="video_to_video")
|
| 367 |
|
| 368 |
use_last_frame_button.click(fn=use_last_frame_as_input, inputs=[output_video], outputs=[image_i2v, tabs])
|
| 369 |
-
|
| 370 |
-
# --- NEW ---
|
| 371 |
-
# Add event handlers for the new stitching and clearing buttons
|
| 372 |
stitch_button.click(fn=stitch_videos, inputs=[clips_state], outputs=[final_video_output])
|
| 373 |
clear_button.click(fn=clear_clips, outputs=[clips_state, clip_counter_display, output_video, final_video_output])
|
| 374 |
|
| 375 |
-
|
| 376 |
if __name__ == "__main__":
|
| 377 |
if os.path.exists(models_dir): print(f"Model directory: {Path(models_dir).resolve()}")
|
| 378 |
demo.queue().launch(debug=True, share=False, mcp_server=True)
|
|
|
|
|
|
|
|
|
|
| 1 |
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
|
|
|
| 2 |
import spaces
|
| 3 |
import os
|
| 4 |
os.putenv('PYTORCH_NVML_BASED_CUDA_CHECK','1')
|
|
|
|
| 53 |
MAX_IMAGE_SIZE = PIPELINE_CONFIG_YAML.get("max_resolution", 1280)
|
| 54 |
MAX_NUM_FRAMES = 900
|
| 55 |
|
|
|
|
| 56 |
pipeline_instance = None
|
| 57 |
latent_upsampler_instance = None
|
| 58 |
models_dir = "downloaded_models_gradio_cpu_init"
|
|
|
|
| 106 |
if latent_upsampler_instance:
|
| 107 |
latent_upsampler_instance.to(target_inference_device)
|
| 108 |
|
|
|
|
|
|
|
| 109 |
def calculate_new_dimensions(orig_w, orig_h):
|
| 110 |
if orig_w == 0 or orig_h == 0:
|
| 111 |
return int(768), int(768)
|
|
|
|
| 117 |
new_h = round((new_w * (orig_h / orig_w)) / 32) * 32
|
| 118 |
return int(max(256, min(new_h, MAX_IMAGE_SIZE))), int(max(256, min(new_w, MAX_IMAGE_SIZE)))
|
| 119 |
|
| 120 |
+
def get_duration(*args, **kwargs):
|
| 121 |
duration_ui = kwargs.get('duration_ui', 5.0)
|
| 122 |
if duration_ui > 20.0: return 120
|
| 123 |
if duration_ui > 13.0: return 90
|
|
|
|
| 131 |
return None, gr.update()
|
| 132 |
try:
|
| 133 |
with imageio.get_reader(video_filepath) as reader:
|
| 134 |
+
# A robust way to get the last frame
|
| 135 |
+
frame_list = list(reader)
|
| 136 |
+
if not frame_list:
|
| 137 |
+
raise ValueError("Video file is empty or unreadable.")
|
| 138 |
+
last_frame_np = frame_list[-1]
|
| 139 |
+
|
| 140 |
pil_image = Image.fromarray(last_frame_np)
|
| 141 |
output_image_path = os.path.join(tempfile.mkdtemp(), f"last_frame_{random.randint(10000,99999)}.png")
|
| 142 |
pil_image.save(output_image_path)
|
|
|
|
| 145 |
gr.Error(f"Failed to extract the last frame: {e}")
|
| 146 |
return None, gr.update()
|
| 147 |
|
|
|
|
|
|
|
| 148 |
def stitch_videos(clips_list):
|
| 149 |
if not clips_list or len(clips_list) < 2:
|
| 150 |
raise gr.Error("You need at least two clips to stitch them together!")
|
|
|
|
| 151 |
print(f"Stitching {len(clips_list)} clips...")
|
| 152 |
try:
|
| 153 |
video_clips = [VideoFileClip(clip_path) for clip_path in clips_list]
|
| 154 |
final_clip = concatenate_videoclips(video_clips, method="compose")
|
|
|
|
| 155 |
final_output_path = os.path.join(tempfile.mkdtemp(), f"stitched_video_{random.randint(10000,99999)}.mp4")
|
| 156 |
final_clip.write_videofile(final_output_path, codec="libx264", audio=False, threads=4, preset='ultrafast')
|
|
|
|
|
|
|
| 157 |
for clip in video_clips:
|
| 158 |
clip.close()
|
|
|
|
| 159 |
print(f"Final video saved to {final_output_path}")
|
| 160 |
return final_output_path
|
| 161 |
except Exception as e:
|
| 162 |
print(f"Error during video stitching: {e}")
|
| 163 |
raise gr.Error(f"Failed to stitch videos: {e}")
|
| 164 |
|
|
|
|
|
|
|
| 165 |
def clear_clips():
|
| 166 |
return [], "Clips created: 0", None, None
|
| 167 |
|
| 168 |
+
# --- MODIFIED ---
|
| 169 |
+
# The function signature has been reordered to match the way the inputs are assembled later.
|
| 170 |
@spaces.GPU(duration=get_duration)
|
| 171 |
+
def generate(prompt, negative_prompt, clips_list, input_image_filepath, input_video_filepath,
|
| 172 |
+
height_ui, width_ui, mode, duration_ui, ui_frames_to_use,
|
| 173 |
+
seed_ui, randomize_seed, ui_guidance_scale, improve_texture_flag, num_steps, fps,
|
|
|
|
|
|
|
| 174 |
progress=gr.Progress(track_tqdm=True)):
|
| 175 |
+
|
| 176 |
+
if mode not in ["text-to-video", "image-to-video", "video-to-video"]:
|
| 177 |
+
raise gr.Error(f"Invalid mode: {mode}.")
|
| 178 |
if mode == "image-to-video" and not input_image_filepath:
|
| 179 |
raise gr.Error("input_image_filepath is required for image-to-video mode")
|
| 180 |
elif mode == "video-to-video" and not input_video_filepath:
|
| 181 |
raise gr.Error("input_video_filepath is required for video-to-video mode")
|
| 182 |
+
|
|
|
|
| 183 |
if randomize_seed:
|
| 184 |
seed_ui = random.randint(0, 2**32 - 1)
|
| 185 |
seed_everething(int(seed_ui))
|
| 186 |
+
|
| 187 |
target_frames_ideal = duration_ui * fps
|
| 188 |
target_frames_rounded = max(1, round(target_frames_ideal))
|
| 189 |
n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
|
| 190 |
actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
|
| 191 |
+
actual_height, actual_width = int(height_ui), int(width_ui)
|
|
|
|
| 192 |
height_padded = ((actual_height - 1) // 32 + 1) * 32
|
| 193 |
width_padded = ((actual_width - 1) // 32 + 1) * 32
|
|
|
|
| 194 |
padding_values = calculate_padding(actual_height, actual_width, height_padded, width_padded)
|
| 195 |
+
|
| 196 |
call_kwargs = {
|
| 197 |
"prompt": prompt, "negative_prompt": negative_prompt, "height": height_padded, "width": width_padded,
|
| 198 |
+
"num_frames": max(9, ((actual_num_frames - 2) // 8 + 1) * 8 + 1), "num_inference_steps": num_steps, "frame_rate": int(fps),
|
| 199 |
"generator": torch.Generator(device=target_inference_device).manual_seed(int(seed_ui)), "output_type": "pt",
|
| 200 |
+
**PIPELINE_CONFIG_YAML,
|
| 201 |
+
"is_video": True, "mixed_precision": (PIPELINE_CONFIG_YAML["precision"] == "mixed_precision"),
|
| 202 |
+
"offload_to_cpu": False, "enhance_prompt": False,
|
|
|
|
| 203 |
}
|
| 204 |
+
|
| 205 |
+
# This logic can be simplified and organized
|
| 206 |
+
stg_mode_str = PIPELINE_CONFIG_YAML.get("stg_mode", "attention_values").lower()
|
| 207 |
stg_map = {
|
| 208 |
"stg_av": SkipLayerStrategy.AttentionValues, "attention_values": SkipLayerStrategy.AttentionValues,
|
| 209 |
"stg_as": SkipLayerStrategy.AttentionSkip, "attention_skip": SkipLayerStrategy.AttentionSkip,
|
| 210 |
"stg_r": SkipLayerStrategy.Residual, "residual": SkipLayerStrategy.Residual,
|
| 211 |
"stg_t": SkipLayerStrategy.TransformerBlock, "transformer_block": SkipLayerStrategy.TransformerBlock,
|
| 212 |
}
|
| 213 |
+
call_kwargs["skip_layer_strategy"] = stg_map.get(stg_mode_str, SkipLayerStrategy.AttentionValues)
|
| 214 |
+
|
|
|
|
| 215 |
if mode == "image-to-video":
|
| 216 |
media_tensor = load_image_to_tensor_with_resize_and_crop(input_image_filepath, actual_height, actual_width)
|
| 217 |
+
call_kwargs["conditioning_items"] = [ConditioningItem(torch.nn.functional.pad(media_tensor, padding_values).to(target_inference_device), 0, 1.0)]
|
|
|
|
| 218 |
elif mode == "video-to-video":
|
| 219 |
call_kwargs["media_items"] = load_media_file(media_path=input_video_filepath, height=actual_height, width=actual_width, max_frames=int(ui_frames_to_use), padding=padding_values).to(target_inference_device)
|
| 220 |
+
|
| 221 |
+
if improve_texture_flag and latent_upsampler_instance:
|
| 222 |
+
multi_scale_pipeline = LTXMultiScalePipeline(pipeline_instance, latent_upsampler_instance)
|
| 223 |
+
pass_args = {"guidance_scale": float(ui_guidance_scale)}
|
| 224 |
+
multi_scale_kwargs = {**call_kwargs, "first_pass": {**PIPELINE_CONFIG_YAML.get("first_pass", {}), **pass_args}, "second_pass": {**PIPELINE_CONFIG_YAML.get("second_pass", {}), **pass_args}}
|
| 225 |
+
result_images_tensor = multi_scale_pipeline(**multi_scale_kwargs).images
|
|
|
|
|
|
|
|
|
|
| 226 |
else:
|
| 227 |
+
single_pass_kwargs = {**call_kwargs, "guidance_scale": float(ui_guidance_scale), **PIPELINE_CONFIG_YAML.get("first_pass", {})}
|
| 228 |
+
result_images_tensor = pipeline_instance(**single_pass_kwargs).images
|
| 229 |
+
|
| 230 |
if result_images_tensor is None:
|
| 231 |
raise gr.Error("Generation failed.")
|
| 232 |
+
|
| 233 |
+
pad_l, pad_r, pad_t, pad_b = padding_values
|
| 234 |
+
result_images_tensor = result_images_tensor[:, :, :actual_num_frames, pad_t:(-pad_b or None), pad_l:(-pad_r or None)]
|
| 235 |
video_np = (np.clip(result_images_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy(), 0, 1) * 255).astype(np.uint8)
|
| 236 |
+
|
| 237 |
output_video_path = os.path.join(tempfile.mkdtemp(), f"output_{random.randint(10000,99999)}.mp4")
|
| 238 |
+
with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], macro_block_size=1) as video_writer:
|
| 239 |
+
for idx, frame in enumerate(video_np):
|
| 240 |
+
progress(idx / len(video_np), desc="Saving video clip...")
|
| 241 |
+
video_writer.append_data(frame)
|
|
|
|
|
|
|
|
|
|
| 242 |
|
|
|
|
|
|
|
| 243 |
updated_clips_list = clips_list + [output_video_path]
|
| 244 |
counter_text = f"Clips created: {len(updated_clips_list)}"
|
|
|
|
| 245 |
return output_video_path, seed_ui, gr.update(visible=True), updated_clips_list, counter_text
|
| 246 |
|
|
|
|
| 247 |
def update_task_image(): return "image-to-video"
|
| 248 |
def update_task_text(): return "text-to-video"
|
| 249 |
def update_task_video(): return "video-to-video"
|
|
|
|
| 251 |
css="""#col-container{margin:0 auto;max-width:900px;}"""
|
| 252 |
|
| 253 |
with gr.Blocks(css=css) as demo:
|
|
|
|
|
|
|
| 254 |
clips_state = gr.State([])
|
| 255 |
+
gr.Markdown("# LTX Video Clip Stitcher")
|
|
|
|
| 256 |
gr.Markdown("Generate short video clips and stitch them together to create a longer animation.")
|
|
|
|
| 257 |
with gr.Row():
|
| 258 |
with gr.Column():
|
| 259 |
with gr.Tabs() as tabs:
|
| 260 |
with gr.Tab("image-to-video", id="i2v_tab") as image_tab:
|
|
|
|
| 261 |
video_i_hidden = gr.Textbox(visible=False)
|
| 262 |
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam", "clipboard"])
|
| 263 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
|
|
|
| 273 |
frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=120, value=9, step=8, info="Must be N*8+1.")
|
| 274 |
v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
|
| 275 |
v2v_button = gr.Button("Generate Video-to-Video Clip", variant="primary")
|
|
|
|
| 276 |
duration_input = gr.Slider(label="Clip Duration (seconds)", minimum=1.0, maximum=10.0, value=2.0, step=0.1)
|
| 277 |
improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True)
|
|
|
|
| 278 |
with gr.Column():
|
| 279 |
output_video = gr.Video(label="Last Generated Clip", interactive=False)
|
| 280 |
use_last_frame_button = gr.Button("Use Last Frame as Input Image", visible=False)
|
|
|
|
|
|
|
|
|
|
| 281 |
with gr.Accordion("Stitching Controls", open=True):
|
| 282 |
clip_counter_display = gr.Markdown("Clips created: 0")
|
| 283 |
with gr.Row():
|
| 284 |
stitch_button = gr.Button("🎬 Stitch All Clips")
|
| 285 |
clear_button = gr.Button("🗑️ Clear All Clips")
|
| 286 |
final_video_output = gr.Video(label="Final Stitched Video", interactive=False)
|
|
|
|
| 287 |
with gr.Accordion("Advanced settings", open=False):
|
|
|
|
| 288 |
mode = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="task", value="image-to-video", visible=False)
|
| 289 |
negative_prompt_input = gr.Textbox(label="Negative Prompt", value="worst quality, inconsistent motion, blurry, jittery, distorted", lines=2)
|
| 290 |
with gr.Row():
|
|
|
|
| 298 |
num_steps = gr.Slider(label="Steps", value=20, step=1, minimum=1, maximum=420)
|
| 299 |
fps = gr.Slider(label="FPS", value=30.0, step=1.0, minimum=4.0, maximum=60.0)
|
| 300 |
|
|
|
|
| 301 |
def handle_image_upload_for_dims(f, h, w):
|
| 302 |
if not f: return gr.update(value=h), gr.update(value=w)
|
| 303 |
img = Image.open(f)
|
|
|
|
| 310 |
orig_w, orig_h = meta.get('size', (reader.get_data(0).shape[1], reader.get_data(0).shape[0]))
|
| 311 |
new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
|
| 312 |
return gr.update(value=new_h), gr.update(value=new_w)
|
| 313 |
+
|
| 314 |
image_i2v.upload(handle_image_upload_for_dims, [image_i2v, height_input, width_input], [height_input, width_input])
|
| 315 |
video_v2v.upload(handle_video_upload_for_dims, [video_v2v, height_input, width_input], [height_input, width_input])
|
| 316 |
image_tab.select(update_task_image, outputs=[mode])
|
|
|
|
| 318 |
video_tab.select(update_task_video, outputs=[mode])
|
| 319 |
|
| 320 |
# --- MODIFIED ---
|
| 321 |
+
# The input lists are now defined explicitly and in the correct order to match the `generate` function signature.
|
| 322 |
+
common_params = [
|
| 323 |
+
height_input, width_input, mode, duration_input, frames_to_use,
|
| 324 |
+
seed_input, randomize_seed_input, guidance_scale_input, improve_texture, num_steps, fps
|
| 325 |
+
]
|
| 326 |
+
t2v_inputs = [t2v_prompt, negative_prompt_input, clips_state, image_n_hidden, video_n_hidden] + common_params
|
| 327 |
+
i2v_inputs = [i2v_prompt, negative_prompt_input, clips_state, image_i2v, video_i_hidden] + common_params
|
| 328 |
+
v2v_inputs = [v2v_prompt, negative_prompt_input, clips_state, image_v_hidden, video_v2v] + common_params
|
| 329 |
|
| 330 |
gen_outputs = [output_video, seed_input, use_last_frame_button, clips_state, clip_counter_display]
|
| 331 |
|
|
|
|
| 335 |
v2v_button.click(hide_btn, outputs=[use_last_frame_button], queue=False).then(fn=generate, inputs=v2v_inputs, outputs=gen_outputs, api_name="video_to_video")
|
| 336 |
|
| 337 |
use_last_frame_button.click(fn=use_last_frame_as_input, inputs=[output_video], outputs=[image_i2v, tabs])
|
|
|
|
|
|
|
|
|
|
| 338 |
stitch_button.click(fn=stitch_videos, inputs=[clips_state], outputs=[final_video_output])
|
| 339 |
clear_button.click(fn=clear_clips, outputs=[clips_state, clip_counter_display, output_video, final_video_output])
|
| 340 |
|
|
|
|
| 341 |
if __name__ == "__main__":
|
| 342 |
if os.path.exists(models_dir): print(f"Model directory: {Path(models_dir).resolve()}")
|
| 343 |
demo.queue().launch(debug=True, share=False, mcp_server=True)
|