kadirnar's picture
update
debc5f3
raw history blame
No virus
7.99 kB
from inpaint_zoom.utils.zoom_in_utils import image_grid, shrink_and_paste_on_blank, dummy, write_video
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from PIL import Image
import gradio as gr
import numpy as np
import torch
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
stable_paint_model_list = [
"stabilityai/stable-diffusion-2-inpainting",
"runwayml/stable-diffusion-inpainting"
]
stable_paint_prompt_list = [
"children running in the forest , sunny, bright, by studio ghibli painting, superior quality, masterpiece, traditional Japanese colors, by Grzegorz Rutkowski, concept art",
"A beautiful landscape of a mountain range with a lake in the foreground",
]
stable_paint_negative_prompt_list = [
"lurry, bad art, blurred, text, watermark",
]
class StableDiffusionZoomIn:
def __init__(self):
self.pipe = None
def load_model(self, model_id):
if self.pipe is None:
self.pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16")
self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config)
self.pipe = self.pipe.to("cuda")
self.pipe.safety_checker = dummy
self.pipe.enable_attention_slicing()
self.pipe.enable_xformers_memory_efficient_attention()
self.g_cuda = torch.Generator(device='cuda')
return self.pipe
def generate_video(
self,
model_id,
prompt,
negative_prompt,
guidance_scale,
num_inference_steps,
):
pipe = self.load_model(model_id)
num_init_images = 2
seed = 9999
height = 512
width = height
current_image = Image.new(mode="RGBA", size=(height, width))
mask_image = np.array(current_image)[:,:,3]
mask_image = Image.fromarray(255-mask_image).convert("RGB")
current_image = current_image.convert("RGB")
init_images = pipe(prompt=[prompt]*num_init_images,
negative_prompt=[negative_prompt]*num_init_images,
image=current_image,
guidance_scale = guidance_scale,
height = height,
width = width,
generator = self.g_cuda.manual_seed(seed),
mask_image=mask_image,
num_inference_steps=num_inference_steps)[0]
image_grid(init_images, rows=1, cols=num_init_images)
init_image_selected = 1 #@param
if num_init_images == 1:
init_image_selected = 0
else:
init_image_selected = init_image_selected - 1
num_outpainting_steps = 20 #@param
mask_width = 128 #@param
num_interpol_frames = 30 #@param
current_image = init_images[init_image_selected]
all_frames = []
all_frames.append(current_image)
for i in range(num_outpainting_steps):
print('Generating image: ' + str(i+1) + ' / ' + str(num_outpainting_steps))
prev_image_fix = current_image
prev_image = shrink_and_paste_on_blank(current_image, mask_width)
current_image = prev_image
#create mask (black image with white mask_width width edges)
mask_image = np.array(current_image)[:,:,3]
mask_image = Image.fromarray(255-mask_image).convert("RGB")
#inpainting step
current_image = current_image.convert("RGB")
images = pipe(prompt=prompt,
negative_prompt=negative_prompt,
image=current_image,
guidance_scale = guidance_scale,
height = height,
width = width,
#this can make the whole thing deterministic but the output less exciting
#generator = g_cuda.manual_seed(seed),
mask_image=mask_image,
num_inference_steps=num_inference_steps)[0]
current_image = images[0]
current_image.paste(prev_image, mask=prev_image)
#interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
for j in range(num_interpol_frames - 1):
interpol_image = current_image
interpol_width = round(
(1- ( 1-2*mask_width/height )**( 1-(j+1)/num_interpol_frames ) )*height/2
)
interpol_image = interpol_image.crop((interpol_width,
interpol_width,
width - interpol_width,
height - interpol_width))
interpol_image = interpol_image.resize((height, width))
#paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
interpol_width2 = round(
( 1 - (height-2*mask_width) / (height-2*interpol_width) ) / 2*height
)
prev_image_fix_crop = shrink_and_paste_on_blank(prev_image_fix, interpol_width2)
interpol_image.paste(prev_image_fix_crop, mask = prev_image_fix_crop)
all_frames.append(interpol_image)
all_frames.append(current_image)
video_file_name = "infinite_zoom_out"
fps = 30
save_path = video_file_name + ".mp4"
write_video(save_path, all_frames, fps)
return save_path
def app():
with gr.Blocks():
with gr.Row():
with gr.Column():
text2image_in_model_path = gr.Dropdown(
choices=stable_paint_model_list,
value=stable_paint_model_list[0],
label='Text-Image Model Id'
)
text2image_in_prompt = gr.Textbox(
lines=1,
value=stable_paint_prompt_list[0],
label='Prompt'
)
text2image_in_negative_prompt = gr.Textbox(
lines=1,
value=stable_paint_negative_prompt_list[0],
label='Negative Prompt'
)
with gr.Row():
with gr.Column():
text2image_in_guidance_scale = gr.Slider(
minimum=0.1,
maximum=15,
step=0.1,
value=7.5,
label='Guidance Scale'
)
text2image_in_num_inference_step = gr.Slider(
minimum=1,
maximum=100,
step=1,
value=50,
label='Num Inference Step'
)
text2image_in_predict = gr.Button(value='Generator')
with gr.Column():
output_image = gr.Video(label='Output')
text2image_in_predict.click(
fn=StableDiffusionZoomIn().generate_video,
inputs=[
text2image_in_model_path,
text2image_in_prompt,
text2image_in_negative_prompt,
text2image_in_guidance_scale,
text2image_in_num_inference_step,
],
outputs=output_image
)