Spaces:
Runtime error
Runtime error
File size: 5,851 Bytes
cc50ae5 554d8e7 cc50ae5 554d8e7 1cebbb0 cc50ae5 554d8e7 cc50ae5 554d8e7 cc50ae5 554d8e7 cc50ae5 554d8e7 cc50ae5 554d8e7 cc50ae5 554d8e7 cc50ae5 554d8e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import gradio as gr
import torch
import os
import spaces
import uuid
from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
from diffusers.utils import export_to_video
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
from PIL import Image
# Constants
bases = {
"Cartoon": "frankjoshua/toonyou_beta6",
"Realistic": "emilianJR/epiCRealism",
"3d": "Lykon/DreamShaper",
"Anime": "Yntec/mistoonAnime2"
}
step_loaded = None
base_loaded = "Realistic"
motion_loaded = None
# Ensure model and scheduler are initialized in GPU-enabled function
if not torch.cuda.is_available():
raise NotImplementedError("No GPU detected!")
device = "cuda"
dtype = torch.float16
pipe = AnimateDiffPipeline.from_pretrained(bases[base_loaded], torch_dtype=dtype).to(device)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
# Safety checkers
from transformers import CLIPFeatureExtractor
feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32") # change for open-source model
# Function: we are using Gradio server to queue calls. However this is open for different architectures
@spaces.GPU(duration=15,enable_queue=True)
def generate_image(prompt, base, motion, step, progress=gr.Progress()):
global step_loaded
global base_loaded
global motion_loaded
print(prompt, base, step)
if step_loaded != step:
repo = "ByteDance/AnimateDiff-Lightning" # we can change to other Diffusion models...
ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors" #...but you must change the implementation at this point to match with the checkpoint
pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device), strict=False)
step_loaded = step
if base_loaded != base:
pipe.unet.load_state_dict(torch.load(hf_hub_download(bases[base], "unet/diffusion_pytorch_model.bin"), map_location=device), strict=False)
base_loaded = base
if motion_loaded != motion:
pipe.unload_lora_weights()
if motion != "":
pipe.load_lora_weights(motion, adapter_name="motion")
pipe.set_adapters(["motion"], [0.7])
motion_loaded = motion
progress((0, step))
def progress_callback(i, t, z):
progress((i+1, step))
output = pipe(prompt=prompt, guidance_scale=1.2, num_inference_steps=step, callback=progress_callback, callback_steps=1) #providing visibility to progress. Useful if using gradio interface
name = str(uuid.uuid4()).replace("-", "")
path = f"/tmp/{name}.mp4"
export_to_video(output.frames[0], path, fps=10)
return path
# Gradio Interface
with gr.Blocks(css="style.css", theme='sudeepshouche/minimalist') as syntvideo:
gr.HTML(
"<h1><center>MAGIC Demo: synthetic video generation application</center></h1>" +
"<p><center><span style='color: red;'>Change the steps from 4 to 8 to get better results.</center></p>" +
"<p><center>Write prompts in style as given in the examples below:</center></p>" +
"<p><center>Focus: Group of Birds in sky (Animate: Birds Moving) (Shot From distance)</center></p>" +
"<p><center>Focus: Trees In forest (Animate: Lion running)</center></p>" +
"<p><center>Focus: Kids Playing (Season: Winter)</center></p>" +
"<p><center>Focus: Cars in Street (Season: Rain, Daytime) (Shot from Distance) (Movement: Cars running)</center></p>"
)
with gr.Group():
with gr.Row():
prompt = gr.Textbox(
label='Prompt'
)
with gr.Row():
select_base = gr.Dropdown(
label='Base model',
choices=[
"Cartoon",
"Realistic",
"3d",
"Anime",
],
value=base_loaded,
interactive=True
)
select_motion = gr.Dropdown(
label='Motion',
choices=[
("Default", ""),
("Zoom in", "guoyww/animatediff-motion-lora-zoom-in"),
("Zoom out", "guoyww/animatediff-motion-lora-zoom-out"),
("Tilt up", "guoyww/animatediff-motion-lora-tilt-up"),
("Tilt down", "guoyww/animatediff-motion-lora-tilt-down"),
("Pan left", "guoyww/animatediff-motion-lora-pan-left"),
("Pan right", "guoyww/animatediff-motion-lora-pan-right"),
("Roll left", "guoyww/animatediff-motion-lora-rolling-anticlockwise"),
("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
],
value="guoyww/animatediff-motion-lora-zoom-in",
interactive=True
)
select_step = gr.Dropdown(
label='Inference steps',
choices=[
('1-Step', 1),
('2-Step', 2),
('4-Step', 4),
('8-Step', 8),
],
value=4,
interactive=True
)
submit = gr.Button(
scale=1,
variant='primary'
)
video = gr.Video(
label='Generate Synthetic Video',
autoplay=True,
height=512,
width=512,
elem_id="video_output"
)
prompt.submit(
fn=generate_image,
inputs=[prompt, select_base, select_motion, select_step],
outputs=video,
)
submit.click(
fn=generate_image,
inputs=[prompt, select_base, select_motion, select_step],
outputs=video,
)
syntvideo.queue().launch() |