ai-tube-model-adl-3

Paused

App Files Files Community

ai-tube-model-adl-3 / app.py

jbilcke-hf HF staff

Update app.py

0a535f7 verified 6 months ago

raw

history blame

No virus

5.89 kB

	import gradio as gr
	import torch
	import os
	import uuid

	from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
	from diffusers.utils import export_to_video
	from huggingface_hub import hf_hub_download
	from safetensors.torch import load_file
	from PIL import Image

	SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')

	# Constants
	bases = {
	"ToonYou": "frankjoshua/toonyou_beta6",
	"epiCRealism": "emilianJR/epiCRealism"
	}
	step_loaded = None
	base_loaded = "epiCRealism"
	motion_loaded = None

	# Ensure model and scheduler are initialized in GPU-enabled function
	if not torch.cuda.is_available():
	raise NotImplementedError("No GPU detected!")

	device = "cuda"
	dtype = torch.float16
	pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
	pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")

	def generate_image(secret_token, prompt, base, motion, step):
	if secret_token != SECRET_TOKEN:
	raise gr.Error(
	f'Invalid secret token. Please fork the original space if you want to use it for yourself.')


	global step_loaded
	global base_loaded
	global motion_loaded
	# print(prompt, base, step)

	if step_loaded != step:
	repo = "ByteDance/AnimateDiff-Lightning"
	ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
	pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device), strict=False)
	step_loaded = step

	if base_loaded != base:
	pipe.unet.load_state_dict(torch.load(hf_hub_download(bases[base], "unet/diffusion_pytorch_model.bin"), map_location=device), strict=False)
	base_loaded = base

	if motion_loaded != motion:
	pipe.unload_lora_weights()
	if motion != "":
	pipe.load_lora_weights(motion, adapter_name="motion")
	pipe.set_adapters(["motion"], [0.7])
	motion_loaded = motion

	progress((0, step))
	def progress_callback(i, t, z):
	progress((i+1, step))

	output = pipe(
	prompt=prompt,

	# this corresponds roughly to 16:9
	# which is the aspect ratio video used by AiTube
	width=912, # 1024,
	height=512, # 576,

	guidance_scale=1.0,
	num_inference_steps=step,
	callback=progress_callback,
	callback_steps=1
	)

	name = str(uuid.uuid4()).replace("-", "")
	path = f"/tmp/{name}.mp4"

	# I think we are looking time here too, converting to mp4 is too slow, we should return
	# the frames unencoded to the frontend renderer
	export_to_video(output.frames[0], path, fps=10)

	# Read the content of the video file and encode it to base64
	with open(path, "rb") as video_file:
	video_base64 = base64.b64encode(video_file.read()).decode('utf-8')

	# Prepend the appropriate data URI header with MIME type
	video_data_uri = 'data:video/mp4;base64,' + video_base64

	# clean-up (otherwise there is always a risk of "ghosting", eg. someone seeing the previous generated video",
	# of one of the steps go wrong)
	os.remove(path)

	return video_data_uri


	# Gradio Interface
	with gr.Blocks() as demo:
	gr.HTML("""
	<div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
	<div style="text-align: center; color: black;">
	<p style="color: black;">This space is a REST API to programmatically generate MP4 videos for AiTube, the next generation video platform.</p>
	<p style="color: black;">Interested in using it? Look no further than the <a href="https://huggingface.co/spaces/ByteDance/AnimateDiff-Lightning" target="_blank">original space</a>!</p>
	</div>
	</div>""")


	secret_token = gr.Text(label='Secret Token', max_lines=1)

	with gr.Group():
	with gr.Row():
	prompt = gr.Textbox(
	label='Prompt'
	)
	with gr.Row():
	select_base = gr.Dropdown(
	label='Base model',
	choices=[
	"ToonYou",
	"epiCRealism",
	],
	value=base_loaded,
	interactive=True
	)
	select_motion = gr.Dropdown(
	label='Motion',
	choices=[
	("Default", ""),
	("Zoom in", "guoyww/animatediff-motion-lora-zoom-in"),
	("Zoom out", "guoyww/animatediff-motion-lora-zoom-out"),
	("Tilt up", "guoyww/animatediff-motion-lora-tilt-up"),
	("Tilt down", "guoyww/animatediff-motion-lora-tilt-down"),
	("Pan left", "guoyww/animatediff-motion-lora-pan-left"),
	("Pan right", "guoyww/animatediff-motion-lora-pan-right"),
	("Roll left", "guoyww/animatediff-motion-lora-rolling-anticlockwise"),
	("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
	],
	value="",
	interactive=True
	)
	select_step = gr.Dropdown(
	label='Inference steps',
	choices=[
	('1-Step', 1),
	('2-Step', 2),
	('4-Step', 4),
	('8-Step', 8)],
	value=4,
	interactive=True
	)
	submit = gr.Button()

	output_video_base64 = gr.Text()

	submit.click(
	fn=generate_image,
	inputs=[secret_token, prompt, select_base, select_motion, select_step],
	outputs=output_video_base64,
	)

	app.queue(max_size=12).launch(show_api=True)