Spaces:
Running
on
A10G
Running
on
A10G
update
Browse files
app.py
CHANGED
@@ -78,10 +78,11 @@ def inference_single_image(ref_image,
|
|
78 |
strength,
|
79 |
ddim_steps,
|
80 |
scale,
|
81 |
-
seed,
|
|
|
82 |
):
|
83 |
raw_background = tar_image.copy()
|
84 |
-
item = process_pairs(ref_image, ref_mask, tar_image, tar_mask)
|
85 |
|
86 |
ref = item['ref']
|
87 |
hint = item['hint']
|
@@ -133,7 +134,7 @@ def inference_single_image(ref_image,
|
|
133 |
return raw_background
|
134 |
|
135 |
|
136 |
-
def process_pairs(ref_image, ref_mask, tar_image, tar_mask, max_ratio = 0.8):
|
137 |
# ========= Reference ===========
|
138 |
# ref expand
|
139 |
ref_box_yyxx = get_bbox_from_mask(ref_mask)
|
@@ -189,21 +190,23 @@ def process_pairs(ref_image, ref_mask, tar_image, tar_mask, max_ratio = 0.8):
|
|
189 |
|
190 |
collage_mask = cropped_target_image.copy() * 0.0
|
191 |
collage_mask[y1:y2,x1:x2,:] = 1.0
|
192 |
-
|
|
|
193 |
|
194 |
# the size before pad
|
195 |
H1, W1 = collage.shape[0], collage.shape[1]
|
196 |
|
197 |
cropped_target_image = pad_to_square(cropped_target_image, pad_value = 0, random = False).astype(np.uint8)
|
198 |
collage = pad_to_square(collage, pad_value = 0, random = False).astype(np.uint8)
|
199 |
-
collage_mask = pad_to_square(collage_mask, pad_value =
|
200 |
|
201 |
# the size after pad
|
202 |
H2, W2 = collage.shape[0], collage.shape[1]
|
203 |
|
204 |
cropped_target_image = cv2.resize(cropped_target_image.astype(np.uint8), (512,512)).astype(np.float32)
|
205 |
collage = cv2.resize(collage.astype(np.uint8), (512,512)).astype(np.float32)
|
206 |
-
collage_mask =
|
|
|
207 |
|
208 |
masked_ref_image = masked_ref_image / 255
|
209 |
cropped_target_image = cropped_target_image / 127.5 - 1.0
|
@@ -225,13 +228,6 @@ ref_list.sort()
|
|
225 |
image_list=[os.path.join(image_dir,file) for file in os.listdir(image_dir) if '.jpg' in file or '.png' in file or '.jpeg' in file]
|
226 |
image_list.sort()
|
227 |
|
228 |
-
def process_image_mask(image_np, mask_np):
|
229 |
-
img = torch.from_numpy(image_np.transpose((2, 0, 1)))
|
230 |
-
img_ten = img.float().div(255).unsqueeze(0)
|
231 |
-
mask_ten = torch.from_numpy(mask_np).float().unsqueeze(0).unsqueeze(0)
|
232 |
-
return img_ten, mask_ten
|
233 |
-
|
234 |
-
|
235 |
def mask_image(image, mask):
|
236 |
blanc = np.ones_like(image) * 255
|
237 |
mask = np.stack([mask,mask,mask],-1) / 255
|
@@ -247,49 +243,38 @@ def run_local(base,
|
|
247 |
ref_mask = ref["mask"].convert("L")
|
248 |
image = np.asarray(image)
|
249 |
mask = np.asarray(mask)
|
250 |
-
mask = np.where(mask > 128,
|
251 |
ref_image = np.asarray(ref_image)
|
252 |
ref_mask = np.asarray(ref_mask)
|
253 |
ref_mask = np.where(ref_mask > 128, 1, 0).astype(np.uint8)
|
254 |
|
255 |
-
# refine the user annotated coarse mask
|
256 |
-
if use_interactive_seg:
|
257 |
-
img_ten, mask_ten = process_image_mask(ref_image, ref_mask)
|
258 |
-
ref_mask = iseg_model(img_ten, mask_ten)['instances'][0,0].detach().numpy() > 0.5
|
259 |
-
|
260 |
-
processed_item = process_pairs(ref_image.copy(), ref_mask.copy(), image.copy(), mask.copy(), max_ratio = 0.8)
|
261 |
-
masked_ref = (processed_item['ref']*255)
|
262 |
-
|
263 |
-
mased_image = mask_image(image, mask)
|
264 |
-
#synthesis = image
|
265 |
synthesis = inference_single_image(ref_image.copy(), ref_mask.copy(), image.copy(), mask.copy(), *args)
|
266 |
synthesis = torch.from_numpy(synthesis).permute(2, 0, 1)
|
267 |
synthesis = synthesis.permute(1, 2, 0).numpy()
|
268 |
-
|
269 |
-
masked_ref = cv2.resize(masked_ref.astype(np.uint8), (512,512))
|
270 |
return [synthesis]
|
271 |
|
|
|
|
|
272 |
with gr.Blocks() as demo:
|
273 |
with gr.Column():
|
274 |
gr.Markdown("# Play with AnyDoor to Teleport your Target Objects! ")
|
275 |
with gr.Row():
|
276 |
baseline_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=1, height=768)
|
277 |
with gr.Accordion("Advanced Option", open=True):
|
278 |
-
#num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
|
279 |
num_samples = 1
|
280 |
strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
|
281 |
ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=30, step=1)
|
282 |
scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=4.5, step=0.1)
|
283 |
seed = gr.Slider(label="Seed", minimum=-1, maximum=999999999, step=1, value=-1)
|
|
|
284 |
gr.Markdown(" Higher guidance-scale makes higher fidelity, while lower guidance-scale leads to more harmonized blending.")
|
285 |
|
286 |
-
|
287 |
gr.Markdown("# Upload / Select Images for the Background (left) and Reference Object (right)")
|
288 |
gr.Markdown("### Your could draw coarse masks on the background to indicate the desired location and shape.")
|
289 |
gr.Markdown("### <u>Do not forget</u> to annotate the target object on the reference image.")
|
290 |
with gr.Row():
|
291 |
-
base = gr.Image(label="Background", tool="sketch", type="pil", height=512, brush_color='#FFFFFF', mask_opacity=0.5)
|
292 |
-
ref = gr.Image(label="Reference", tool="sketch", type="pil", height=512, brush_color='#FFFFFF', mask_opacity=0.5)
|
293 |
run_local_button = gr.Button(label="Generate", value="Run")
|
294 |
|
295 |
with gr.Row():
|
@@ -304,7 +289,8 @@ with gr.Blocks() as demo:
|
|
304 |
strength,
|
305 |
ddim_steps,
|
306 |
scale,
|
307 |
-
seed,
|
|
|
308 |
],
|
309 |
outputs=[baseline_gallery]
|
310 |
)
|
|
|
78 |
strength,
|
79 |
ddim_steps,
|
80 |
scale,
|
81 |
+
seed,
|
82 |
+
enable_shape_control
|
83 |
):
|
84 |
raw_background = tar_image.copy()
|
85 |
+
item = process_pairs(ref_image, ref_mask, tar_image, tar_mask, enable_shape_control = enable_shape_control)
|
86 |
|
87 |
ref = item['ref']
|
88 |
hint = item['hint']
|
|
|
134 |
return raw_background
|
135 |
|
136 |
|
137 |
+
def process_pairs(ref_image, ref_mask, tar_image, tar_mask, max_ratio = 0.8, enable_shape_control = False):
|
138 |
# ========= Reference ===========
|
139 |
# ref expand
|
140 |
ref_box_yyxx = get_bbox_from_mask(ref_mask)
|
|
|
190 |
|
191 |
collage_mask = cropped_target_image.copy() * 0.0
|
192 |
collage_mask[y1:y2,x1:x2,:] = 1.0
|
193 |
+
if enable_shape_control:
|
194 |
+
collage_mask = np.stack([cropped_tar_mask,cropped_tar_mask,cropped_tar_mask],-1)
|
195 |
|
196 |
# the size before pad
|
197 |
H1, W1 = collage.shape[0], collage.shape[1]
|
198 |
|
199 |
cropped_target_image = pad_to_square(cropped_target_image, pad_value = 0, random = False).astype(np.uint8)
|
200 |
collage = pad_to_square(collage, pad_value = 0, random = False).astype(np.uint8)
|
201 |
+
collage_mask = pad_to_square(collage_mask, pad_value = 2, random = False).astype(np.uint8)
|
202 |
|
203 |
# the size after pad
|
204 |
H2, W2 = collage.shape[0], collage.shape[1]
|
205 |
|
206 |
cropped_target_image = cv2.resize(cropped_target_image.astype(np.uint8), (512,512)).astype(np.float32)
|
207 |
collage = cv2.resize(collage.astype(np.uint8), (512,512)).astype(np.float32)
|
208 |
+
collage_mask = cv2.resize(collage_mask.astype(np.uint8), (512,512), interpolation = cv2.INTER_NEAREST).astype(np.float32)
|
209 |
+
collage_mask[collage_mask == 2] = -1
|
210 |
|
211 |
masked_ref_image = masked_ref_image / 255
|
212 |
cropped_target_image = cropped_target_image / 127.5 - 1.0
|
|
|
228 |
image_list=[os.path.join(image_dir,file) for file in os.listdir(image_dir) if '.jpg' in file or '.png' in file or '.jpeg' in file]
|
229 |
image_list.sort()
|
230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
def mask_image(image, mask):
|
232 |
blanc = np.ones_like(image) * 255
|
233 |
mask = np.stack([mask,mask,mask],-1) / 255
|
|
|
243 |
ref_mask = ref["mask"].convert("L")
|
244 |
image = np.asarray(image)
|
245 |
mask = np.asarray(mask)
|
246 |
+
mask = np.where(mask > 128, 1, 0).astype(np.uint8)
|
247 |
ref_image = np.asarray(ref_image)
|
248 |
ref_mask = np.asarray(ref_mask)
|
249 |
ref_mask = np.where(ref_mask > 128, 1, 0).astype(np.uint8)
|
250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
synthesis = inference_single_image(ref_image.copy(), ref_mask.copy(), image.copy(), mask.copy(), *args)
|
252 |
synthesis = torch.from_numpy(synthesis).permute(2, 0, 1)
|
253 |
synthesis = synthesis.permute(1, 2, 0).numpy()
|
|
|
|
|
254 |
return [synthesis]
|
255 |
|
256 |
+
|
257 |
+
|
258 |
with gr.Blocks() as demo:
|
259 |
with gr.Column():
|
260 |
gr.Markdown("# Play with AnyDoor to Teleport your Target Objects! ")
|
261 |
with gr.Row():
|
262 |
baseline_gallery = gr.Gallery(label='Output', show_label=True, elem_id="gallery", columns=1, height=768)
|
263 |
with gr.Accordion("Advanced Option", open=True):
|
|
|
264 |
num_samples = 1
|
265 |
strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
|
266 |
ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=30, step=1)
|
267 |
scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=4.5, step=0.1)
|
268 |
seed = gr.Slider(label="Seed", minimum=-1, maximum=999999999, step=1, value=-1)
|
269 |
+
enable_shape_control = gr.Checkbox(label='Enable Shape Control', value=False)
|
270 |
gr.Markdown(" Higher guidance-scale makes higher fidelity, while lower guidance-scale leads to more harmonized blending.")
|
271 |
|
|
|
272 |
gr.Markdown("# Upload / Select Images for the Background (left) and Reference Object (right)")
|
273 |
gr.Markdown("### Your could draw coarse masks on the background to indicate the desired location and shape.")
|
274 |
gr.Markdown("### <u>Do not forget</u> to annotate the target object on the reference image.")
|
275 |
with gr.Row():
|
276 |
+
base = gr.Image(label="Background", source="upload", tool="sketch", type="pil", height=512, brush_color='#FFFFFF', mask_opacity=0.5)
|
277 |
+
ref = gr.Image(label="Reference", source="upload", tool="sketch", type="pil", height=512, brush_color='#FFFFFF', mask_opacity=0.5)
|
278 |
run_local_button = gr.Button(label="Generate", value="Run")
|
279 |
|
280 |
with gr.Row():
|
|
|
289 |
strength,
|
290 |
ddim_steps,
|
291 |
scale,
|
292 |
+
seed,
|
293 |
+
enable_shape_control,
|
294 |
],
|
295 |
outputs=[baseline_gallery]
|
296 |
)
|