File size: 2,922 Bytes
95e4531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from utils import write_video, dummy
from PIL import Image
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import gradio as gr


def stable_diffusion_zoom_out(
  repo_id="stabilityai/stable-diffusion-2-inpainting",
  original_prompt="a dog",
  negative_prompt="a cat",
  steps=32,
  num_frames=10,
):
  pipe = DiffusionPipeline.from_pretrained(repo_id, torch_dtype=torch.float16, revision="fp16")
  pipe.set_use_memory_efficient_attention_xformers(True)
  pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
  pipe = pipe.to("cuda")
  pipe.safety_checker = dummy

  current_image = Image.new(mode="RGBA", size=(512, 512))
  mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
  mask_image = Image.fromarray(255-mask_image).convert("RGB")
  current_image = current_image.convert("RGB")

  num_images = 1
  prompt = [original_prompt] * num_images
  negative_prompt = [negative_prompt] * num_images

  images = pipe(prompt=prompt, negative_prompt=negative_prompt, image=current_image, mask_image=mask_image, num_inference_steps=25)[0]
  current_image = images[0]

  all_frames = []
  all_frames.append(current_image)

  for i in range(num_frames):
    next_image = np.array(current_image.convert("RGBA"))*0
    prev_image = current_image.resize((512-2*steps,512-2*steps))
    prev_image = prev_image.convert("RGBA")
    prev_image = np.array(prev_image)
    next_image[:, :, 3] = 1
    next_image[steps:512-steps,steps:512-steps,:] = prev_image
    prev_image = Image.fromarray(next_image)
    current_image = prev_image
    mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
    mask_image = Image.fromarray(255-mask_image).convert("RGB")
    current_image = current_image.convert("RGB")
    images = pipe(prompt=prompt, negative_prompt=negative_prompt, image=current_image, mask_image=mask_image, num_inference_steps=25)[0]
    current_image = images[0]
    current_image.paste(prev_image, mask=prev_image)
    all_frames.append(current_image)
    
  save_path = "infinite_zoom_out.mp4"  
  write_video(save_path, all_frames, fps=16)
  return save_path

inputs = [
  gr.Dropdown(["stabilityai/stable-diffusion-2-inpainting"], label="Model"),
  gr.inputs.Textbox(lines=1, default="a dog", label="Prompt"),
  gr.inputs.Textbox(lines=1, default="a cat", label="Negative Prompt"),
  gr.inputs.Slider(minimum=1, maximum=64, default=32, label="Steps"),
  gr.inputs.Slider(minimum=1, maximum=100, default=10, label="Frames"),
]
output = gr.outputs.Video()
title = "Stable Diffusion Infinite Zoom Out"

demo_app = gr.Interface(
    fn=stable_diffusion_zoom_out,
    inputs=inputs,
    outputs=output,
    title=title,
    theme='huggingface',
)
demo_app.launch(debug=True, enable_queue=True)