kadirnar commited on
Commit
31f45e8
1 Parent(s): 54948a8

Upload 7 files

Browse files
app.py CHANGED
@@ -1,5 +1,5 @@
1
- from inpaint_zoom.zoom_out_app import stable_diffusion_text2img_app
2
-
3
 
4
  import gradio as gr
5
 
@@ -23,8 +23,8 @@ with app:
23
  with gr.Row():
24
  with gr.Column():
25
  with gr.Tab('Zoom Out'):
26
- stable_diffusion_text2img_app()
27
  with gr.Tab('Zoom In'):
28
- pass
29
 
30
  app.launch(debug=True)
 
1
+ from inpaint_zoom.app.zoom_out_app import stable_diffusion_zoom_out_app
2
+ from inpaint_zoom.app.zoom_in_app import stable_diffusion_zoom_in_app
3
 
4
  import gradio as gr
5
 
 
23
  with gr.Row():
24
  with gr.Column():
25
  with gr.Tab('Zoom Out'):
26
+ stable_diffusion_zoom_out_app()
27
  with gr.Tab('Zoom In'):
28
+ stable_diffusion_zoom_in_app()
29
 
30
  app.launch(debug=True)
inpaint_zoom/app/__init__.py ADDED
File without changes
inpaint_zoom/app/zoom_in_app.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from inpaint_zoom.utils.zoom_in_utils import image_grid, shrink_and_paste_on_blank, dummy, write_video
2
+ from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
3
+ from PIL import Image
4
+ import gradio as gr
5
+ import numpy as np
6
+ import torch
7
+ import os
8
+ os.environ["CUDA_VISIBLE_DEVICES"]="0"
9
+
10
+
11
+ stable_paint_model_list = [
12
+ "stabilityai/stable-diffusion-2-inpainting",
13
+ "runwayml/stable-diffusion-inpainting"
14
+ ]
15
+
16
+ stable_paint_prompt_list = [
17
+ "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers",
18
+ "A beautiful landscape of a mountain range with a lake in the foreground",
19
+ ]
20
+
21
+ stable_paint_negative_prompt_list = [
22
+ "lurry, bad art, blurred, text, watermark",
23
+ ]
24
+
25
+
26
+ def stable_diffusion_zoom_in(
27
+ model_id,
28
+ prompt,
29
+ negative_prompt,
30
+ guidance_scale,
31
+ num_inference_steps,
32
+ ):
33
+
34
+ pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16")
35
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
36
+ pipe = pipe.to("cuda")
37
+
38
+ pipe.safety_checker = dummy
39
+ pipe.enable_attention_slicing()
40
+ g_cuda = torch.Generator(device='cuda')
41
+
42
+ num_init_images = 2
43
+ seed = 9999
44
+ height = 512
45
+ width = height
46
+
47
+ current_image = Image.new(mode="RGBA", size=(height, width))
48
+ mask_image = np.array(current_image)[:,:,3]
49
+ mask_image = Image.fromarray(255-mask_image).convert("RGB")
50
+ current_image = current_image.convert("RGB")
51
+
52
+ init_images = pipe(prompt=[prompt]*num_init_images,
53
+ negative_prompt=[negative_prompt]*num_init_images,
54
+ image=current_image,
55
+ guidance_scale = guidance_scale,
56
+ height = height,
57
+ width = width,
58
+ generator = g_cuda.manual_seed(seed),
59
+ mask_image=mask_image,
60
+ num_inference_steps=num_inference_steps)[0]
61
+
62
+
63
+ image_grid(init_images, rows=1, cols=num_init_images)
64
+
65
+
66
+ init_image_selected = 1 #@param
67
+ if num_init_images == 1:
68
+ init_image_selected = 0
69
+ else:
70
+ init_image_selected = init_image_selected - 1
71
+
72
+ num_outpainting_steps = 20 #@param
73
+ mask_width = 128 #@param
74
+ num_interpol_frames = 30 #@param
75
+
76
+ current_image = init_images[init_image_selected]
77
+ all_frames = []
78
+ all_frames.append(current_image)
79
+
80
+ for i in range(num_outpainting_steps):
81
+ print('Generating image: ' + str(i+1) + ' / ' + str(num_outpainting_steps))
82
+
83
+ prev_image_fix = current_image
84
+
85
+ prev_image = shrink_and_paste_on_blank(current_image, mask_width)
86
+
87
+ current_image = prev_image
88
+
89
+ #create mask (black image with white mask_width width edges)
90
+ mask_image = np.array(current_image)[:,:,3]
91
+ mask_image = Image.fromarray(255-mask_image).convert("RGB")
92
+
93
+ #inpainting step
94
+ current_image = current_image.convert("RGB")
95
+ images = pipe(prompt=prompt,
96
+ negative_prompt=negative_prompt,
97
+ image=current_image,
98
+ guidance_scale = guidance_scale,
99
+ height = height,
100
+ width = width,
101
+ #this can make the whole thing deterministic but the output less exciting
102
+ #generator = g_cuda.manual_seed(seed),
103
+ mask_image=mask_image,
104
+ num_inference_steps=num_inference_steps)[0]
105
+ current_image = images[0]
106
+ current_image.paste(prev_image, mask=prev_image)
107
+
108
+ #interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
109
+ for j in range(num_interpol_frames - 1):
110
+ interpol_image = current_image
111
+ interpol_width = round(
112
+ (1- ( 1-2*mask_width/height )**( 1-(j+1)/num_interpol_frames ) )*height/2
113
+ )
114
+ interpol_image = interpol_image.crop((interpol_width,
115
+ interpol_width,
116
+ width - interpol_width,
117
+ height - interpol_width))
118
+
119
+ interpol_image = interpol_image.resize((height, width))
120
+
121
+ #paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
122
+ interpol_width2 = round(
123
+ ( 1 - (height-2*mask_width) / (height-2*interpol_width) ) / 2*height
124
+ )
125
+ prev_image_fix_crop = shrink_and_paste_on_blank(prev_image_fix, interpol_width2)
126
+ interpol_image.paste(prev_image_fix_crop, mask = prev_image_fix_crop)
127
+
128
+ all_frames.append(interpol_image)
129
+
130
+ all_frames.append(current_image)
131
+
132
+ video_file_name = "infinite_zoom_out"
133
+ fps = 30
134
+ save_path = video_file_name + ".mp4"
135
+ write_video(save_path, all_frames, fps)
136
+ return save_path
137
+
138
+
139
+
140
+ def stable_diffusion_zoom_in_app():
141
+ with gr.Blocks():
142
+ with gr.Row():
143
+ with gr.Column():
144
+ text2image_in_model_path = gr.Dropdown(
145
+ choices=stable_paint_model_list,
146
+ value=stable_paint_model_list[0],
147
+ label='Text-Image Model Id'
148
+ )
149
+
150
+ text2image_in_prompt = gr.Textbox(
151
+ lines=1,
152
+ value=stable_paint_prompt_list[0],
153
+ label='Prompt'
154
+ )
155
+
156
+ text2image_in_negative_prompt = gr.Textbox(
157
+ lines=1,
158
+ value=stable_paint_negative_prompt_list[0],
159
+ label='Negative Prompt'
160
+ )
161
+
162
+ with gr.Accordion("Advanced Options", open=False):
163
+ text2image_in_guidance_scale = gr.Slider(
164
+ minimum=0.1,
165
+ maximum=15,
166
+ step=0.1,
167
+ value=7.5,
168
+ label='Guidance Scale'
169
+ )
170
+
171
+ text2image_in_num_inference_step = gr.Slider(
172
+ minimum=1,
173
+ maximum=100,
174
+ step=1,
175
+ value=50,
176
+ label='Num Inference Step'
177
+ )
178
+
179
+ text2image_in_predict = gr.Button(value='Generator')
180
+
181
+ with gr.Column():
182
+ output_image = gr.Video(label='Output')
183
+
184
+
185
+ text2image_in_predict.click(
186
+ fn=stable_diffusion_zoom_in,
187
+ inputs=[
188
+ text2image_in_model_path,
189
+ text2image_in_prompt,
190
+ text2image_in_negative_prompt,
191
+ text2image_in_guidance_scale,
192
+ text2image_in_num_inference_step,
193
+ ],
194
+ outputs=output_image
195
+ )
inpaint_zoom/app/zoom_out_app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
2
+ from inpaint_zoom.utils.zoom_out_utils import preprocess_image, preprocess_mask_image, write_video, dummy
3
+ from PIL import Image
4
+ import gradio as gr
5
+ import torch
6
+ import os
7
+ os.environ["CUDA_VISIBLE_DEVICES"]="0"
8
+
9
+
10
+ stable_paint_model_list = [
11
+ "stabilityai/stable-diffusion-2-inpainting",
12
+ "runwayml/stable-diffusion-inpainting"
13
+ ]
14
+
15
+ stable_paint_prompt_list = [
16
+ "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers",
17
+ "A beautiful landscape of a mountain range with a lake in the foreground",
18
+ ]
19
+
20
+ stable_paint_negative_prompt_list = [
21
+ "lurry, bad art, blurred, text, watermark",
22
+ ]
23
+
24
+
25
+ def stable_diffusion_zoom_out(
26
+ model_id,
27
+ original_prompt,
28
+ negative_prompt,
29
+ guidance_scale,
30
+ num_inference_steps,
31
+ step_size,
32
+ num_frames,
33
+ ):
34
+
35
+ pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
36
+ pipe.set_use_memory_efficient_attention_xformers(True)
37
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
38
+ pipe = pipe.to("cuda")
39
+ pipe.safety_checker = dummy
40
+
41
+ new_image = Image.new(mode="RGBA", size=(512,512))
42
+ current_image, mask_image = preprocess_mask_image(new_image)
43
+
44
+ current_image = pipe(
45
+ prompt=[original_prompt],
46
+ negative_prompt=[negative_prompt],
47
+ image=current_image,
48
+ mask_image=mask_image,
49
+ num_inference_steps=num_inference_steps,
50
+ guidance_scale=guidance_scale
51
+ ).images[0]
52
+
53
+
54
+ all_frames = []
55
+ all_frames.append(current_image)
56
+
57
+ for i in range(num_frames):
58
+ prev_image = preprocess_image(current_image, step_size, 512)
59
+ current_image = prev_image
60
+ current_image, mask_image = preprocess_mask_image(current_image)
61
+ current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
62
+
63
+ current_image.paste(prev_image, mask=prev_image)
64
+ all_frames.append(current_image)
65
+
66
+ save_path = "output.mp4"
67
+ write_video(save_path, all_frames, fps=30)
68
+ return save_path
69
+
70
+
71
+ def stable_diffusion_zoom_out_app():
72
+ with gr.Blocks():
73
+ with gr.Row():
74
+ with gr.Column():
75
+ text2image_out_model_path = gr.Dropdown(
76
+ choices=stable_paint_model_list,
77
+ value=stable_paint_model_list[0],
78
+ label='Text-Image Model Id'
79
+ )
80
+
81
+ text2image_out_prompt = gr.Textbox(
82
+ lines=1,
83
+ value=stable_paint_prompt_list[0],
84
+ label='Prompt'
85
+ )
86
+
87
+ text2image_out_negative_prompt = gr.Textbox(
88
+ lines=1,
89
+ value=stable_paint_negative_prompt_list[0],
90
+ label='Negative Prompt'
91
+ )
92
+
93
+ with gr.Accordion("Advanced Options", open=False):
94
+ text2image_out_guidance_scale = gr.Slider(
95
+ minimum=0.1,
96
+ maximum=15,
97
+ step=0.1,
98
+ value=7.5,
99
+ label='Guidance Scale'
100
+ )
101
+
102
+ text2image_out_num_inference_step = gr.Slider(
103
+ minimum=1,
104
+ maximum=100,
105
+ step=1,
106
+ value=50,
107
+ label='Num Inference Step'
108
+ )
109
+
110
+ text2image_out_step_size = gr.Slider(
111
+ minimum=1,
112
+ maximum=100,
113
+ step=1,
114
+ value=10,
115
+ label='Step Size'
116
+ )
117
+
118
+ text2image_out_num_frames = gr.Slider(
119
+ minimum=1,
120
+ maximum=100,
121
+ step=1,
122
+ value=10,
123
+ label='Frames'
124
+ )
125
+
126
+ text2image_out_predict = gr.Button(value='Generator')
127
+
128
+ with gr.Column():
129
+ output_image = gr.Video(label="Output Video")
130
+
131
+
132
+ text2image_out_predict.click(
133
+ fn=stable_diffusion_zoom_out,
134
+ inputs=[
135
+ text2image_out_model_path,
136
+ text2image_out_prompt,
137
+ text2image_out_negative_prompt,
138
+ text2image_out_guidance_scale,
139
+ text2image_out_num_inference_step,
140
+ text2image_out_step_size,
141
+ text2image_out_num_frames,
142
+ ],
143
+ outputs=output_image
144
+ )
inpaint_zoom/utils/__init__.py ADDED
File without changes
inpaint_zoom/utils/zoom_in_utils.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import numpy as np
3
+ import cv2
4
+ import os
5
+ os.environ["CUDA_VISIBLE_DEVICES"]="0"
6
+
7
+ def write_video(file_path, frames, fps, reversed = True):
8
+ """
9
+ Writes frames to an mp4 video file
10
+ :param file_path: Path to output video, must end with .mp4
11
+ :param frames: List of PIL.Image objects
12
+ :param fps: Desired frame rate
13
+ :param reversed: if order of images to be reversed (default = True)
14
+ """
15
+ if reversed == True:
16
+ frames.reverse()
17
+
18
+ w, h = frames[0].size
19
+ fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
20
+ #fourcc = cv2.VideoWriter_fourcc(*'avc1')
21
+ writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
22
+
23
+ for frame in frames:
24
+ np_frame = np.array(frame.convert('RGB'))
25
+ cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
26
+ writer.write(cv_frame)
27
+
28
+ writer.release()
29
+
30
+ def image_grid(imgs, rows, cols):
31
+ assert len(imgs) == rows*cols
32
+
33
+ w, h = imgs[0].size
34
+ grid = Image.new('RGB', size=(cols*w, rows*h))
35
+ grid_w, grid_h = grid.size
36
+
37
+ for i, img in enumerate(imgs):
38
+ grid.paste(img, box=(i%cols*w, i//cols*h))
39
+ return grid
40
+
41
+ def shrink_and_paste_on_blank(current_image, mask_width):
42
+ """
43
+ Decreases size of current_image by mask_width pixels from each side,
44
+ then adds a mask_width width transparent frame,
45
+ so that the image the function returns is the same size as the input.
46
+ :param current_image: input image to transform
47
+ :param mask_width: width in pixels to shrink from each side
48
+ """
49
+
50
+ height = current_image.height
51
+ width = current_image.width
52
+
53
+ #shrink down by mask_width
54
+ prev_image = current_image.resize((height-2*mask_width,width-2*mask_width))
55
+ prev_image = prev_image.convert("RGBA")
56
+ prev_image = np.array(prev_image)
57
+
58
+ #create blank non-transparent image
59
+ blank_image = np.array(current_image.convert("RGBA"))*0
60
+ blank_image[:,:,3] = 1
61
+
62
+ #paste shrinked onto blank
63
+ blank_image[mask_width:height-mask_width,mask_width:width-mask_width,:] = prev_image
64
+ prev_image = Image.fromarray(blank_image)
65
+
66
+ return prev_image
67
+
68
+ def dummy(images, **kwargs):
69
+ return images, False
inpaint_zoom/utils/zoom_out_utils.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ from PIL import Image
4
+
5
+ def write_video(file_path, frames, fps):
6
+ """
7
+ Writes frames to an mp4 video file
8
+ :param file_path: Path to output video, must end with .mp4
9
+ :param frames: List of PIL.Image objects
10
+ :param fps: Desired frame rate
11
+ """
12
+
13
+ w, h = frames[0].size
14
+ fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
15
+ writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
16
+
17
+ for frame in frames:
18
+ np_frame = np.array(frame.convert('RGB'))
19
+ cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
20
+ writer.write(cv_frame)
21
+
22
+ writer.release()
23
+
24
+
25
+ def dummy(images, **kwargs):
26
+ return images, False
27
+
28
+ def preprocess_image(current_image, steps, image_size):
29
+ next_image = np.array(current_image.convert("RGBA"))*0
30
+ prev_image = current_image.resize((image_size-2*steps,image_size-2*steps))
31
+ prev_image = prev_image.convert("RGBA")
32
+ prev_image = np.array(prev_image)
33
+ next_image[:, :, 3] = 1
34
+ next_image[steps:image_size-steps,steps:image_size-steps,:] = prev_image
35
+ prev_image = Image.fromarray(next_image)
36
+
37
+ return prev_image
38
+
39
+
40
+ def preprocess_mask_image(current_image):
41
+ mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
42
+ mask_image = Image.fromarray(255-mask_image).convert("RGB")
43
+ current_image = current_image.convert("RGB")
44
+
45
+ return current_image, mask_image