diffusers / app.py
seawolf2357's picture
Update app.py
18e9814 verified
raw
history blame contribute delete
No virus
4.36 kB
import torch
import gradio as gr
from diffusers import AnimateDiffPipeline, MotionAdapter, DPMSolverMultistepScheduler, AutoencoderKL, SparseControlNetModel
from diffusers.utils import export_to_gif, load_image
from transformers import pipeline
from PIL import Image
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
def translate_korean_to_english(text):
if any('\u3131' <= char <= '\u3163' or '\uac00' <= char <= '\ud7a3' for char in text):
translated = translator(text)[0]['translation_text']
return translated
return text
def generate_video(prompt, negative_prompt, num_inference_steps, conditioning_frame_indices, controlnet_conditioning_scale, width, height, num_frames):
prompt = translate_korean_to_english(prompt)
negative_prompt = translate_korean_to_english(negative_prompt)
motion_adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-3", torch_dtype=torch.float16).to(device)
controlnet = SparseControlNetModel.from_pretrained("guoyww/animatediff-sparsectrl-scribble", torch_dtype=torch.float16).to(device)
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16).to(device)
pipe = AnimateDiffPipeline.from_pretrained(
"SG161222/Realistic_Vision_V6.0_B1_noVAE",
motion_adapter=motion_adapter,
controlnet=controlnet,
vae=vae,
torch_dtype=torch.float16,
).to(device)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, beta_schedule="linear", algorithm_type="dpmsolver++", use_karras_sigmas=True)
image_files = [
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-1.png",
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-2.png",
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-3.png"
]
conditioning_frames = [load_image(img_file) for img_file in image_files]
conditioning_frame_indices = eval(conditioning_frame_indices)
controlnet_conditioning_scale = float(controlnet_conditioning_scale)
video = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=num_inference_steps,
conditioning_frames=conditioning_frames,
controlnet_conditioning_scale=controlnet_conditioning_scale,
controlnet_frame_indices=conditioning_frame_indices,
width=width,
height=height,
num_frames=num_frames,
generator=torch.Generator().manual_seed(1337),
).frames[0]
# ํ›„์ฒ˜๋ฆฌ: ํ”„๋ ˆ์ž„ ๊ฐ„ ๋ณด๊ฐ„์„ ํ†ตํ•œ ๋ถ€๋“œ๋Ÿฌ์šด ์ „ํ™˜
interpolated_frames = []
for i in range(len(video) - 1):
interpolated_frames.append(video[i])
interpolated_frames.append(Image.blend(video[i], video[i+1], 0.5))
interpolated_frames.append(video[-1])
export_to_gif(interpolated_frames, "output.gif")
return "output.gif"
demo = gr.Interface(
fn=generate_video,
inputs=[
gr.Textbox(label="Prompt (ํ•œ๊ธ€ ๋˜๋Š” ์˜์–ด)", value="๊ท€์—ฌ์šด ๊ฐ•์•„์ง€๊ฐ€ ์กฐ์šฉํžˆ ์ง–๊ณ ์žˆ, ๊ฑธ์ž‘, ๊ณ ํ’ˆ์งˆ"),
gr.Textbox(label="Negative Prompt (ํ•œ๊ธ€ ๋˜๋Š” ์˜์–ด)", value="์ €ํ’ˆ์งˆ, ์ตœ์•…์˜ ํ’ˆ์งˆ, ๋ ˆํ„ฐ๋ฐ•์Šค"),
gr.Slider(label="Number of Inference Steps", minimum=1, maximum=200, step=1, value=150),
gr.Textbox(label="Conditioning Frame Indices", value="[0, 8, 15]"),
gr.Slider(label="ControlNet Conditioning Scale", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
gr.Slider(label="Width", minimum=256, maximum=1024, step=64, value=512),
gr.Slider(label="Height", minimum=256, maximum=1024, step=64, value=512),
gr.Slider(label="Number of Frames", minimum=16, maximum=128, step=16, value=64)
],
outputs=gr.Image(label="Generated Video"),
title="AnimateDiffSparseControlNetPipeline์„ ์‚ฌ์šฉํ•œ ๊ณ ํ’ˆ์งˆ ๋น„๋””์˜ค ์ƒ์„ฑ",
description="AnimateDiffSparseControlNetPipeline์„ ์‚ฌ์šฉํ•˜์—ฌ ๊ณ ํ’ˆ์งˆ ๋น„๋””์˜ค๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค. ํ•œ๊ธ€ ๋˜๋Š” ์˜์–ด๋กœ ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
)
demo.launch()