Spaces:

kadirnar
/

Video-Diffusion-WebUI

Runtime error

App Files Files Community

Video-Diffusion-WebUI / inpaint_zoom /app /zoom_in_app.py

kadirnar

update

debc5f3 about 1 year ago

raw history blame

No virus

7.99 kB

	from inpaint_zoom.utils.zoom_in_utils import image_grid, shrink_and_paste_on_blank, dummy, write_video
	from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
	from PIL import Image
	import gradio as gr
	import numpy as np
	import torch
	import os
	os.environ["CUDA_VISIBLE_DEVICES"]="0"


	stable_paint_model_list = [
	"stabilityai/stable-diffusion-2-inpainting",
	"runwayml/stable-diffusion-inpainting"
	]

	stable_paint_prompt_list = [
	"children running in the forest , sunny, bright, by studio ghibli painting, superior quality, masterpiece, traditional Japanese colors, by Grzegorz Rutkowski, concept art",
	"A beautiful landscape of a mountain range with a lake in the foreground",
	]

	stable_paint_negative_prompt_list = [
	"lurry, bad art, blurred, text, watermark",
	]

	class StableDiffusionZoomIn:
	def __init__(self):
	self.pipe = None

	def load_model(self, model_id):
	if self.pipe is None:
	self.pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16")

	self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config)
	self.pipe = self.pipe.to("cuda")

	self.pipe.safety_checker = dummy
	self.pipe.enable_attention_slicing()
	self.pipe.enable_xformers_memory_efficient_attention()
	self.g_cuda = torch.Generator(device='cuda')

	return self.pipe

	def generate_video(
	self,
	model_id,
	prompt,
	negative_prompt,
	guidance_scale,
	num_inference_steps,
	):

	pipe = self.load_model(model_id)

	num_init_images = 2
	seed = 9999
	height = 512
	width = height

	current_image = Image.new(mode="RGBA", size=(height, width))
	mask_image = np.array(current_image)[:,:,3]
	mask_image = Image.fromarray(255-mask_image).convert("RGB")
	current_image = current_image.convert("RGB")

	init_images = pipe(prompt=[prompt]*num_init_images,
	negative_prompt=[negative_prompt]*num_init_images,
	image=current_image,
	guidance_scale = guidance_scale,
	height = height,
	width = width,
	generator = self.g_cuda.manual_seed(seed),
	mask_image=mask_image,
	num_inference_steps=num_inference_steps)[0]


	image_grid(init_images, rows=1, cols=num_init_images)


	init_image_selected = 1 #@param
	if num_init_images == 1:
	init_image_selected = 0
	else:
	init_image_selected = init_image_selected - 1

	num_outpainting_steps = 20 #@param
	mask_width = 128 #@param
	num_interpol_frames = 30 #@param

	current_image = init_images[init_image_selected]
	all_frames = []
	all_frames.append(current_image)

	for i in range(num_outpainting_steps):
	print('Generating image: ' + str(i+1) + ' / ' + str(num_outpainting_steps))

	prev_image_fix = current_image

	prev_image = shrink_and_paste_on_blank(current_image, mask_width)

	current_image = prev_image

	#create mask (black image with white mask_width width edges)
	mask_image = np.array(current_image)[:,:,3]
	mask_image = Image.fromarray(255-mask_image).convert("RGB")

	#inpainting step
	current_image = current_image.convert("RGB")
	images = pipe(prompt=prompt,
	negative_prompt=negative_prompt,
	image=current_image,
	guidance_scale = guidance_scale,
	height = height,
	width = width,
	#this can make the whole thing deterministic but the output less exciting
	#generator = g_cuda.manual_seed(seed),
	mask_image=mask_image,
	num_inference_steps=num_inference_steps)[0]
	current_image = images[0]
	current_image.paste(prev_image, mask=prev_image)

	#interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
	for j in range(num_interpol_frames - 1):
	interpol_image = current_image
	interpol_width = round(
	(1- ( 1-2mask_width/height )( 1-(j+1)/num_interpol_frames ) )height/2
	)
	interpol_image = interpol_image.crop((interpol_width,
	interpol_width,
	width - interpol_width,
	height - interpol_width))

	interpol_image = interpol_image.resize((height, width))

	#paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
	interpol_width2 = round(
	( 1 - (height-2mask_width) / (height-2interpol_width) ) / 2*height
	)
	prev_image_fix_crop = shrink_and_paste_on_blank(prev_image_fix, interpol_width2)
	interpol_image.paste(prev_image_fix_crop, mask = prev_image_fix_crop)

	all_frames.append(interpol_image)

	all_frames.append(current_image)

	video_file_name = "infinite_zoom_out"
	fps = 30
	save_path = video_file_name + ".mp4"
	write_video(save_path, all_frames, fps)
	return save_path



	def app():
	with gr.Blocks():
	with gr.Row():
	with gr.Column():
	text2image_in_model_path = gr.Dropdown(
	choices=stable_paint_model_list,
	value=stable_paint_model_list[0],
	label='Text-Image Model Id'
	)

	text2image_in_prompt = gr.Textbox(
	lines=1,
	value=stable_paint_prompt_list[0],
	label='Prompt'
	)

	text2image_in_negative_prompt = gr.Textbox(
	lines=1,
	value=stable_paint_negative_prompt_list[0],
	label='Negative Prompt'
	)

	with gr.Row():
	with gr.Column():
	text2image_in_guidance_scale = gr.Slider(
	minimum=0.1,
	maximum=15,
	step=0.1,
	value=7.5,
	label='Guidance Scale'
	)

	text2image_in_num_inference_step = gr.Slider(
	minimum=1,
	maximum=100,
	step=1,
	value=50,
	label='Num Inference Step'
	)

	text2image_in_predict = gr.Button(value='Generator')

	with gr.Column():
	output_image = gr.Video(label='Output')


	text2image_in_predict.click(
	fn=StableDiffusionZoomIn().generate_video,
	inputs=[
	text2image_in_model_path,
	text2image_in_prompt,
	text2image_in_negative_prompt,
	text2image_in_guidance_scale,
	text2image_in_num_inference_step,
	],
	outputs=output_image
	)