metadata
license: other
language:
- en
library_name: diffusers
# image + video
import torch
from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXConditionPipeline, LTXVideoCondition
from diffusers.utils import export_to_video, load_video, load_image
device = "cuda:2"
dtype = torch.bfloat16
repo = "YiYiXu/ltx-95"
# Initialize the pipeline
pipe = LTXConditionPipeline.from_pretrained(repo, torch_dtype=dtype)
pipe.to(device)
video = load_video(
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cosmos/cosmos-video2world-input-vid.mp4"
)
image = load_image(
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cosmos/cosmos-video2world-input.jpg"
)
condition1 = LTXVideoCondition(
image=image,
frame_index=0,
)
condition2 = LTXVideoCondition(
video=video,
frame_index=80,
)
# Define prompts
prompt = "The video depicts a long, straight highway stretching into the distance, flanked by metal guardrails. The road is divided into multiple lanes, with a few vehicles visible in the far distance. The surrounding landscape features dry, grassy fields on one side and rolling hills on the other. The sky is mostly clear with a few scattered clouds, suggesting a bright, sunny day. And then the camera switch to a inding mountain road covered in snow, with a single vehicle traveling along it. The road is flanked by steep, rocky cliffs and sparse vegetation. The landscape is characterized by rugged terrain and a river visible in the distance. The scene captures the solitude and beauty of a winter drive through a mountainous region."
negative_prompt='worst quality, inconsistent motion, blurry, jittery, distorted'
# Generate the video
generator = torch.Generator(device=device).manual_seed(0)
video = pipe(
conditions=[condition1, condition2],
prompt=prompt,
negative_prompt=negative_prompt,
width=768,
height=512,
num_frames=161,
num_inference_steps=40,
generator=generator,
).frames[0]
# Export the video
export_to_video(video, "output.mp4", fps=24)