Clint Adams commited on
Commit
dbf5021
·
1 Parent(s): 0087f2b

Initial attempt

Browse files
Files changed (3) hide show
  1. README.md +8 -1
  2. app.py +285 -0
  3. requirements.txt +9 -0
README.md CHANGED
@@ -7,6 +7,13 @@ sdk: gradio
7
  sdk_version: 4.26.0
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
7
  sdk_version: 4.26.0
8
  app_file: app.py
9
  pinned: false
10
+ models:
11
+ - stabilityai/stable-diffusion-xl-base-1.0
12
+ - h94/IP-Adapter
13
+ preload_from_hub:
14
+ - stabilityai/stable-diffusion-xl-base-1.0
15
+ - h94/IP-Adapter
16
  ---
17
 
18
+ This demo uses code lifted almost verbatim from
19
+ [Outpainting II - Differential Diffusion](https://huggingface.co/blog/OzzyGT/outpainting-differential-diffusion).
app.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import cv2
3
+ import numpy as np
4
+ import torch
5
+ import gradio as gr
6
+
7
+ from diffusers import DPMSolverMultistepScheduler, StableDiffusionXLPipeline
8
+
9
+ xlp_kwargs = {
10
+ 'custom_pipeline': 'pipeline_stable_diffusion_xl_differential_img2img'
11
+ }
12
+
13
+ if torch.cuda.is_available():
14
+ device = 'cuda'
15
+ device_dtype = torch.float16
16
+ xlp_kwargs['variant'] = 'fp16'
17
+ else:
18
+ device = 'cpu'
19
+ device_dtype = torch.float32
20
+
21
+ xlp_kwargs['torch_dtype'] = device_dtype
22
+
23
+
24
+ def merge_images(original, new_image, offset, direction):
25
+ if direction in ["left", "right"]:
26
+ merged_image = np.zeros(
27
+ (original.shape[0], original.shape[1] + offset, 3), dtype=np.uint8)
28
+ elif direction in ["top", "bottom"]:
29
+ merged_image = np.zeros(
30
+ (original.shape[0] + offset, original.shape[1], 3), dtype=np.uint8)
31
+
32
+ if direction == "left":
33
+ merged_image[:, offset:] = original
34
+ merged_image[:, : new_image.shape[1]] = new_image
35
+ elif direction == "right":
36
+ merged_image[:, : original.shape[1]] = original
37
+ merged_image[:, original.shape[1] + offset -
38
+ new_image.shape[1]: original.shape[1] + offset] = new_image
39
+ elif direction == "top":
40
+ merged_image[offset:, :] = original
41
+ merged_image[: new_image.shape[0], :] = new_image
42
+ elif direction == "bottom":
43
+ merged_image[: original.shape[0], :] = original
44
+ merged_image[original.shape[0] + offset - new_image.shape[0]: original.shape[0] + offset, :] = new_image
45
+
46
+ return merged_image
47
+
48
+
49
+ def slice_image(image):
50
+ height, width, _ = image.shape
51
+ slice_size = min(width // 2, height // 3)
52
+
53
+ slices = []
54
+
55
+ for h in range(3):
56
+ for w in range(2):
57
+ left = w * slice_size
58
+ upper = h * slice_size
59
+ right = left + slice_size
60
+ lower = upper + slice_size
61
+
62
+ if w == 1 and right > width:
63
+ left -= right - width
64
+ right = width
65
+ if h == 2 and lower > height:
66
+ upper -= lower - height
67
+ lower = height
68
+
69
+ slice = image[upper:lower, left:right]
70
+ slices.append(slice)
71
+
72
+ return slices
73
+
74
+
75
+ def process_image(
76
+ image,
77
+ fill_color=(0, 0, 0),
78
+ mask_offset=50,
79
+ blur_radius=500,
80
+ expand_pixels=256,
81
+ direction="left",
82
+ inpaint_mask_color=50,
83
+ max_size=1024,
84
+ ):
85
+ height, width = image.shape[:2]
86
+
87
+ new_height = height + \
88
+ (expand_pixels if direction in ["top", "bottom"] else 0)
89
+ new_width = width + \
90
+ (expand_pixels if direction in ["left", "right"] else 0)
91
+
92
+ if new_height > max_size:
93
+ # If so, crop the image from the opposite side
94
+ if direction == "top":
95
+ image = image[:max_size, :]
96
+ elif direction == "bottom":
97
+ image = image[new_height - max_size:, :]
98
+ new_height = max_size
99
+
100
+ if new_width > max_size:
101
+ # If so, crop the image from the opposite side
102
+ if direction == "left":
103
+ image = image[:, :max_size]
104
+ elif direction == "right":
105
+ image = image[:, new_width - max_size:]
106
+ new_width = max_size
107
+
108
+ height, width = image.shape[:2]
109
+
110
+ new_image = np.full((new_height, new_width, 3), fill_color, dtype=np.uint8)
111
+ mask = np.full_like(new_image, 255, dtype=np.uint8)
112
+ inpaint_mask = np.full_like(new_image, 0, dtype=np.uint8)
113
+
114
+ mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
115
+ inpaint_mask = cv2.cvtColor(inpaint_mask, cv2.COLOR_BGR2GRAY)
116
+
117
+ if direction == "left":
118
+ new_image[:, expand_pixels:] = image[:, : max_size - expand_pixels]
119
+ mask[:, : expand_pixels + mask_offset] = inpaint_mask_color
120
+ inpaint_mask[:, :expand_pixels] = 255
121
+ elif direction == "right":
122
+ new_image[:, :width] = image
123
+ mask[:, width - mask_offset:] = inpaint_mask_color
124
+ inpaint_mask[:, width:] = 255
125
+ elif direction == "top":
126
+ new_image[expand_pixels:, :] = image[: max_size - expand_pixels, :]
127
+ mask[: expand_pixels + mask_offset, :] = inpaint_mask_color
128
+ inpaint_mask[:expand_pixels, :] = 255
129
+ elif direction == "bottom":
130
+ new_image[:height, :] = image
131
+ mask[height - mask_offset:, :] = inpaint_mask_color
132
+ inpaint_mask[height:, :] = 255
133
+
134
+ # mask blur
135
+ if blur_radius % 2 == 0:
136
+ blur_radius += 1
137
+ mask = cv2.GaussianBlur(mask, (blur_radius, blur_radius), 0)
138
+
139
+ # telea inpaint
140
+ _, mask_np = cv2.threshold(
141
+ inpaint_mask, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
142
+ inpaint = cv2.inpaint(new_image, mask_np, 3, cv2.INPAINT_TELEA)
143
+
144
+ # convert image to tensor
145
+ inpaint = cv2.cvtColor(inpaint, cv2.COLOR_BGR2RGB)
146
+ inpaint = torch.from_numpy(inpaint).permute(2, 0, 1).float()
147
+ inpaint = inpaint / 127.5 - 1
148
+ inpaint = inpaint.unsqueeze(0).to(device)
149
+
150
+ # convert mask to tensor
151
+ mask = torch.from_numpy(mask)
152
+ mask = mask.unsqueeze(0).float() / 255.0
153
+ mask = mask.to(device)
154
+
155
+ return inpaint, mask
156
+
157
+
158
+ def image_resize(image, new_size=1024):
159
+ height, width = image.shape[:2]
160
+
161
+ aspect_ratio = width / height
162
+ new_width = new_size
163
+ new_height = new_size
164
+
165
+ if aspect_ratio != 1:
166
+ if width > height:
167
+ new_height = int(new_size / aspect_ratio)
168
+ else:
169
+ new_width = int(new_size * aspect_ratio)
170
+
171
+ image = cv2.resize(image, (new_width, new_height),
172
+ interpolation=cv2.INTER_LANCZOS4)
173
+
174
+ return image
175
+
176
+
177
+ pipeline = StableDiffusionXLPipeline.from_pretrained(
178
+ "stabilityai/stable-diffusion-xl-base-1.0",
179
+ **xlp_kwargs
180
+ ).to(device)
181
+ pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
182
+ pipeline.scheduler.config, use_karras_sigmas=True)
183
+
184
+ pipeline.load_ip_adapter(
185
+ "h94/IP-Adapter",
186
+ subfolder="sdxl_models",
187
+ weight_name=[
188
+ "ip-adapter-plus_sdxl_vit-h.safetensors",
189
+ ],
190
+ image_encoder_folder="models/image_encoder",
191
+ )
192
+ pipeline.set_ip_adapter_scale(0.1)
193
+
194
+
195
+ def generate_image(prompt, negative_prompt, image, mask, ip_adapter_image, seed: int = None):
196
+ if seed is None:
197
+ seed = random.randint(0, 2**32 - 1)
198
+
199
+ generator = torch.Generator(device="cpu").manual_seed(seed)
200
+
201
+ image = pipeline(
202
+ prompt=prompt,
203
+ negative_prompt=negative_prompt,
204
+ width=1024,
205
+ height=1024,
206
+ guidance_scale=4.0,
207
+ num_inference_steps=25,
208
+ original_image=image,
209
+ image=image,
210
+ strength=1.0,
211
+ map=mask,
212
+ generator=generator,
213
+ ip_adapter_image=[ip_adapter_image],
214
+ output_type="np",
215
+ ).images[0]
216
+
217
+ image = (image * 255).astype(np.uint8)
218
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
219
+
220
+ return image
221
+
222
+
223
+ def outpaint(pil_image, direction='right', times_to_expand=4):
224
+ prompt = ""
225
+ negative_prompt = ""
226
+ inpaint_mask_color = 50 # lighter use more of the Telea inpainting
227
+ # I recommend to don't go more than half of the picture so it has context
228
+ expand_pixels = 256
229
+
230
+ original = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
231
+ image = image_resize(original)
232
+ # image.shape[1] for horizontal, image.shape[0] for vertical
233
+ expand_pixels_to_square = 1024 - image.shape[1]
234
+ image, mask = process_image(
235
+ image, expand_pixels=expand_pixels_to_square, direction=direction, inpaint_mask_color=inpaint_mask_color
236
+ )
237
+
238
+ ip_adapter_image = []
239
+ for index, part in enumerate(slice_image(original)):
240
+ ip_adapter_image.append(part)
241
+
242
+ generated = generate_image(
243
+ prompt, negative_prompt, image, mask, ip_adapter_image)
244
+ final_image = generated
245
+
246
+ for i in range(times_to_expand):
247
+ image, mask = process_image(
248
+ final_image, direction=direction, expand_pixels=expand_pixels, inpaint_mask_color=inpaint_mask_color
249
+ )
250
+
251
+ ip_adapter_image = []
252
+ for index, part in enumerate(slice_image(generated)):
253
+ ip_adapter_image.append(part)
254
+
255
+ generated = generate_image(
256
+ prompt, negative_prompt, image, mask, ip_adapter_image)
257
+ final_image = merge_images(final_image, generated, 256, direction)
258
+
259
+ color_converted = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
260
+ return color_converted
261
+
262
+
263
+ gradio_app = gr.Interface(
264
+ outpaint,
265
+ inputs=[
266
+ gr.Image(label="Select start image", sources=[
267
+ 'upload', 'webcam'], type='pil'),
268
+ gr.Radio(["left", "right", "top", 'bottom'], label="Direction",
269
+ info="Outward from which edge to paint?", value='right'),
270
+ gr.Slider(2, 4, step=1, value=4, label="Times to expand",
271
+ info="Choose between 2 and 4"),
272
+ ],
273
+ outputs=[gr.Image(label="Processed Image")],
274
+ title="Outpainting with differential diffusion demo",
275
+ description='''
276
+ # Outpainting with differential diffusion demo
277
+ This uses code lifted almost verbatim from
278
+ [Outpainting II - Differential Diffusion](https://huggingface.co/blog/OzzyGT/outpainting-differential-diffusion).
279
+
280
+ If this Space is running on a CPU, it will take hours to get results. You may [duplicate this space](https://huggingface.co/spaces/clinteroni/outpainting-demo?duplicate=true) and pay for an upgraded runtime instead.
281
+ '''
282
+ )
283
+
284
+ if __name__ == "__main__":
285
+ gradio_app.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ git+https://github.com/huggingface/diffusers.git
3
+ gradio
4
+ numpy
5
+ opencv-python
6
+ pillow
7
+ torch
8
+ torchvision
9
+ transformers