Animation_With_Sound

Runtime error

App Files Files Community

Animation_With_Sound / app.py

ysharma HF staff

Update app.py

d9b0c2c verified 10 months ago

raw

history blame

6.94 kB

	import gradio as gr
	import torch
	import os
	import spaces
	import uuid

	from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
	from diffusers.utils import export_to_video
	from huggingface_hub import hf_hub_download
	from safetensors.torch import load_file
	from PIL import Image
	from gradio_client import Client, file
	from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips


	# using tango2 via Gradio python client
	client = Client("declare-lab/tango2")

	# Constants
	bases = {
	"ToonYou": "frankjoshua/toonyou_beta6",
	"epiCRealism": "emilianJR/epiCRealism"
	}
	step_loaded = None
	base_loaded = "epiCRealism"
	motion_loaded = None

	# Ensure model and scheduler are initialized in GPU-enabled function
	if not torch.cuda.is_available():
	raise NotImplementedError("No GPU detected!")

	device = "cuda"
	dtype = torch.float16
	pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
	pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")

	# Safety checkers
	from safety_checker import StableDiffusionSafetyChecker
	from transformers import CLIPFeatureExtractor

	safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to(device)
	feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32")

	def check_nsfw_images(images: list[Image.Image]) -> list[bool]:
	safety_checker_input = feature_extractor(images, return_tensors="pt").to(device)
	has_nsfw_concepts = safety_checker(images=[images], clip_input=safety_checker_input.pixel_values.to(device))
	return has_nsfw_concepts

	# Function
	@spaces.GPU(enable_queue=True)
	def generate_image(prompt, base, motion, step, progress=gr.Progress()):
	global step_loaded
	global base_loaded
	global motion_loaded
	print(prompt, base, step)

	if step_loaded != step:
	repo = "ByteDance/AnimateDiff-Lightning"
	ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
	pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device), strict=False)
	step_loaded = step

	if base_loaded != base:
	pipe.unet.load_state_dict(torch.load(hf_hub_download(bases[base], "unet/diffusion_pytorch_model.bin"), map_location=device), strict=False)
	base_loaded = base

	if motion_loaded != motion:
	pipe.unload_lora_weights()
	if motion != "":
	pipe.load_lora_weights(motion, adapter_name="motion")
	pipe.set_adapters(["motion"], [0.7])
	motion_loaded = motion

	progress((0, step))
	def progress_callback(i, t, z):
	progress((i+1, step))

	output = pipe(prompt=prompt, guidance_scale=1.0, num_inference_steps=step, callback=progress_callback, callback_steps=1)

	has_nsfw_concepts = check_nsfw_images([output.frames[0][0]])
	if has_nsfw_concepts[0]:
	gr.Warning("NSFW content detected.")
	return None

	name = str(uuid.uuid4()).replace("-", "")
	video_path = f"/tmp/{name}.mp4"
	export_to_video(output.frames[0], video_path, fps=10)

	audio_path = tango2(prompt)
	final_video_path = fuse_together(audio_path, video_path)

	return final_video_path


	def tango2(prompt):
	results = client.predict(
	prompt=prompt,
	steps=100,
	guidance=3,
	api_name="/predict"
	)
	return results

	def fuse_together(audio, video):

	# Load your video and audio files
	video_clip = VideoFileClip(video)
	audio_clip = AudioFileClip(audio)

	# Loop the video twice
	looped_video = concatenate_videoclips([video_clip, video_clip])

	# Cut the audio to match the duration of the looped video
	looped_audio = audio_clip.subclip(0, looped_video.duration)

	# Set the audio of the looped video to the adjusted audio clip
	final_video = looped_video.set_audio(looped_audio)

	# Write the result to a file (output will be twice the length of the original video)
	name = str(uuid.uuid4()).replace("-", "")
	path = f"/tmp/{name}.mp4"
	final_video.write_videofile(path, codec="libx264", audio_codec="aac")

	return path


	# Gradio Interface
	with gr.Blocks(css="style.css") as demo:
	gr.HTML(
	"<h1><center>AnimateDiff-Lightning⚡ + TANGO 2</center></h1>" +
	"<p><center>Using Gradio Python Client to combine <b>AnimateDiff Lightning</b> with <b>Tango2</b> to give Voice to your Generated Videos</center></p>" +
	"<p><center>Refer Gradio Guide for Python Clients here :<a href='https://www.gradio.app/guides/getting-started-with-the-python-client'>Getting Started with the Gradio Python client</a></center></p>"
	)
	with gr.Group():
	with gr.Row():
	prompt = gr.Textbox(
	label='Prompt (English)'
	)
	with gr.Row():
	select_base = gr.Dropdown(
	label='Base model',
	choices=[
	"ToonYou",
	"epiCRealism",
	],
	value=base_loaded,
	interactive=True
	)
	select_motion = gr.Dropdown(
	label='Motion',
	choices=[
	("Default", ""),
	("Zoom in", "guoyww/animatediff-motion-lora-zoom-in"),
	("Zoom out", "guoyww/animatediff-motion-lora-zoom-out"),
	("Tilt up", "guoyww/animatediff-motion-lora-tilt-up"),
	("Tilt down", "guoyww/animatediff-motion-lora-tilt-down"),
	("Pan left", "guoyww/animatediff-motion-lora-pan-left"),
	("Pan right", "guoyww/animatediff-motion-lora-pan-right"),
	("Roll left", "guoyww/animatediff-motion-lora-rolling-anticlockwise"),
	("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
	],
	value="",
	interactive=True
	)
	select_step = gr.Dropdown(
	label='Inference steps',
	choices=[
	('1-Step', 1),
	('2-Step', 2),
	('4-Step', 4),
	('8-Step', 8)],
	value=4,
	interactive=True
	)
	submit = gr.Button(
	scale=1,
	variant='primary'
	)
	video = gr.Video(
	label='AnimateDiff-Lightning',
	autoplay=True,
	height=512,
	width=512,
	elem_id="video_output"
	)

	prompt.submit(
	fn=generate_image,
	inputs=[prompt, select_base, select_motion, select_step],
	outputs=video,
	)
	submit.click(
	fn=generate_image,
	inputs=[prompt, select_base, select_motion, select_step],
	outputs=video,
	)

	demo.queue().launch()