jbilcke-hf's picture
jbilcke-hf HF staff
Update app.py
0a535f7 verified
raw
history blame
No virus
5.89 kB
import gradio as gr
import torch
import os
import uuid
from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
from diffusers.utils import export_to_video
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
from PIL import Image
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
# Constants
bases = {
"ToonYou": "frankjoshua/toonyou_beta6",
"epiCRealism": "emilianJR/epiCRealism"
}
step_loaded = None
base_loaded = "epiCRealism"
motion_loaded = None
# Ensure model and scheduler are initialized in GPU-enabled function
if not torch.cuda.is_available():
raise NotImplementedError("No GPU detected!")
device = "cuda"
dtype = torch.float16
pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
def generate_image(secret_token, prompt, base, motion, step):
if secret_token != SECRET_TOKEN:
raise gr.Error(
f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
global step_loaded
global base_loaded
global motion_loaded
# print(prompt, base, step)
if step_loaded != step:
repo = "ByteDance/AnimateDiff-Lightning"
ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device), strict=False)
step_loaded = step
if base_loaded != base:
pipe.unet.load_state_dict(torch.load(hf_hub_download(bases[base], "unet/diffusion_pytorch_model.bin"), map_location=device), strict=False)
base_loaded = base
if motion_loaded != motion:
pipe.unload_lora_weights()
if motion != "":
pipe.load_lora_weights(motion, adapter_name="motion")
pipe.set_adapters(["motion"], [0.7])
motion_loaded = motion
progress((0, step))
def progress_callback(i, t, z):
progress((i+1, step))
output = pipe(
prompt=prompt,
# this corresponds roughly to 16:9
# which is the aspect ratio video used by AiTube
width=912, # 1024,
height=512, # 576,
guidance_scale=1.0,
num_inference_steps=step,
callback=progress_callback,
callback_steps=1
)
name = str(uuid.uuid4()).replace("-", "")
path = f"/tmp/{name}.mp4"
# I think we are looking time here too, converting to mp4 is too slow, we should return
# the frames unencoded to the frontend renderer
export_to_video(output.frames[0], path, fps=10)
# Read the content of the video file and encode it to base64
with open(path, "rb") as video_file:
video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
# Prepend the appropriate data URI header with MIME type
video_data_uri = 'data:video/mp4;base64,' + video_base64
# clean-up (otherwise there is always a risk of "ghosting", eg. someone seeing the previous generated video",
# of one of the steps go wrong)
os.remove(path)
return video_data_uri
# Gradio Interface
with gr.Blocks() as demo:
gr.HTML("""
<div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
<div style="text-align: center; color: black;">
<p style="color: black;">This space is a REST API to programmatically generate MP4 videos for AiTube, the next generation video platform.</p>
<p style="color: black;">Interested in using it? Look no further than the <a href="https://huggingface.co/spaces/ByteDance/AnimateDiff-Lightning" target="_blank">original space</a>!</p>
</div>
</div>""")
secret_token = gr.Text(label='Secret Token', max_lines=1)
with gr.Group():
with gr.Row():
prompt = gr.Textbox(
label='Prompt'
)
with gr.Row():
select_base = gr.Dropdown(
label='Base model',
choices=[
"ToonYou",
"epiCRealism",
],
value=base_loaded,
interactive=True
)
select_motion = gr.Dropdown(
label='Motion',
choices=[
("Default", ""),
("Zoom in", "guoyww/animatediff-motion-lora-zoom-in"),
("Zoom out", "guoyww/animatediff-motion-lora-zoom-out"),
("Tilt up", "guoyww/animatediff-motion-lora-tilt-up"),
("Tilt down", "guoyww/animatediff-motion-lora-tilt-down"),
("Pan left", "guoyww/animatediff-motion-lora-pan-left"),
("Pan right", "guoyww/animatediff-motion-lora-pan-right"),
("Roll left", "guoyww/animatediff-motion-lora-rolling-anticlockwise"),
("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
],
value="",
interactive=True
)
select_step = gr.Dropdown(
label='Inference steps',
choices=[
('1-Step', 1),
('2-Step', 2),
('4-Step', 4),
('8-Step', 8)],
value=4,
interactive=True
)
submit = gr.Button()
output_video_base64 = gr.Text()
submit.click(
fn=generate_image,
inputs=[secret_token, prompt, select_base, select_motion, select_step],
outputs=output_video_base64,
)
app.queue(max_size=12).launch(show_api=True)