stable-video-diffusion

Sleeping

App Files Files Community

stable-video-diffusion / app.py

awacke1

Create app.py

ada2691 verified 2 months ago

raw

history blame

3.69 kB

	import gradio as gr
	import torch
	import os
	import uuid
	import random
	from glob import glob
	from pathlib import Path
	from typing import Optional
	from diffusers import StableVideoDiffusionPipeline
	from diffusers.utils import load_image, export_to_video
	from PIL import Image
	from huggingface_hub import hf_hub_download


	pipe = StableVideoDiffusionPipeline.from_pretrained(
	"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
	)
	pipe.to("cuda")
	pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
	max_64_bit_int = 2**63 - 1

	# Function to sample video from the input image
	def sample(
	image: Image,
	seed: Optional[int] = 42,
	randomize_seed: bool = True,
	motion_bucket_id: int = 127,
	fps_id: int = 6,
	version: str = "svd_xt",
	cond_aug: float = 0.02,
	decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
	device: str = "cuda",
	output_folder: str = "outputs",
	):
	if image.mode == "RGBA":
	image = image.convert("RGB")
	if randomize_seed:
	seed = random.randint(0, max_64_bit_int)

	generator = torch.manual_seed(seed)

	os.makedirs(output_folder, exist_ok=True)
	base_count = len(glob(os.path.join(output_folder, "*.mp4")))
	video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
	frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
	export_to_video(frames, video_path, fps=fps_id)
	torch.manual_seed(seed)
	return video_path, seed

	# Function to resize the uploaded image
	def resize_image(image, output_size=(1024, 576)):
	target_aspect = output_size[0] / output_size[1]
	image_aspect = image.width / image.height

	if image_aspect > target_aspect:
	new_height = output_size[1]
	new_width = int(new_height * image_aspect)
	resized_image = image.resize((new_width, new_height), Image.LANCZOS)
	left = (new_width - output_size[0]) / 2
	top = 0
	right = (new_width + output_size[0]) / 2
	bottom = output_size[1]
	else:
	new_width = output_size[0]
	new_height = int(new_width / image_aspect)
	resized_image = image.resize((new_width, new_height), Image.LANCZOS)
	left = 0
	top = (new_height - output_size[1]) / 2
	right = output_size[0]
	bottom = (new_height + output_size[1]) / 2

	cropped_image = resized_image.crop((left, top, right, bottom))
	return cropped_image

	# Dynamically load image files from the 'images' directory
	def get_example_images():
	image_dir = "images/"
	image_files = glob(os.path.join(image_dir, ".png")) + glob(os.path.join(image_dir, ".jpg"))
	return image_files

	# Gradio interface setup
	with gr.Blocks() as demo:
	gr.Markdown('''# Stable Video Diffusion using Image 2 Video XT
	#### Research release: generate `4s` vid from a single image at (`25 frames` at `6 fps`).''')

	with gr.Row():
	with gr.Column():
	image = gr.Image(label="Upload your image", type="pil")
	generate_btn = gr.Button("Generate")
	video = gr.Video()

	with gr.Accordion("Advanced options", open=False):
	seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
	motion_bucket_id = gr.Slider(label="Motion bucket id", value=127, minimum=1, maximum=255)
	fps_id = gr.Slider(label="Frames per second", value=6, minimum=5, maximum=30)

	image.upload(fn