Spaces:

afmck
/

stable-diffusion-inpainting-segmentation

Runtime error

App Files Files Community

Alexander McKinney commited on Nov 14, 2022

Commit

8cd1abb

•

1 Parent(s): 7d008e4

fixes bug loading new image with different masks and cleans up code

Browse files

Files changed (2) hide show

README.md +1 -0
app.py +42 -133

README.md CHANGED Viewed

@@ -16,3 +16,4 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 - is there a way to stop the loading icon appearing? Would rather copy last input than flicker
 - onclick events for canvas? we can draw, but can I get coordinates?
 - checkboxes seem a bit busted with indexes

 - is there a way to stop the loading icon appearing? Would rather copy last input than flicker
 - onclick events for canvas? we can draw, but can I get coordinates?
 - checkboxes seem a bit busted with indexes
+- set canvas default to segmentation output, make small edits rather than doing whole thing

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import numpy as np
 import torch
 from PIL import Image
 from skimage.measure import block_reduce
-from typing import List
 from functools import reduce
 import gradio as gr
@@ -14,15 +14,6 @@ from transformers.models.detr.feature_extraction_detr import rgb_to_id
 from diffusers import StableDiffusionInpaintPipeline
-# TODO: maybe need to port to `Blocks` system
-# allegedly provides:
-# Have multi-step interfaces, in which the output of one model becomes the
-# input to the next model, or have more flexible data flows in general.
-# and:
-# Change a component’s properties (for example, the choices in a dropdown) or its visibility based on user input
-# https://huggingface.co/course/chapter9/7?fw=pt
 torch.inference_mode()
 torch.no_grad()
@@ -61,7 +52,6 @@ def clean_mask(mask, max_kernel: int = 23, min_kernel: int = 5):
 device = get_device()
 feature_extractor, segmentation_model, segmentation_cfg = load_segmentation_models()
-# segmentation_model = segmentation_model.to(device)
 pipe = load_diffusion_pipeline()
 pipe = pipe.to(device)
@@ -83,7 +73,6 @@ def fn_segmentation(image, max_kernel, min_kernel):
         m = panoptic_seg_id == s['id']
         raw_masks.append(m.astype(np.uint8) * 255)
-    # masks = fn_clean(raw_masks, max_kernel, min_kernel)
     checkbox_choices = [f"{s['id']}:{segmentation_cfg.id2label[s['category_id']]}" for s in result['segments_info']]
     checkbox_group = gr.CheckboxGroup.update(
@@ -119,7 +108,16 @@ def fn_update_mask(
     return combined_mask.astype(np.uint8) * 255, Image.fromarray(masked_image)
-def fn_diffusion(prompt: str, masked_image: Image, mask: Image, num_diffusion_steps: int):
     STABLE_DIFFUSION_SMALL_EDGE = 512
     w, h = masked_image.size
@@ -141,151 +139,62 @@ def fn_diffusion(prompt: str, masked_image: Image, mask: Image, num_diffusion_st
         prompt=prompt,
         image=masked_image,
         mask_image=mask,
-        num_inference_steps=num_diffusion_steps
     ).images[0]
     inpainted_image = inpainted_image.resize((w, h))
     return inpainted_image
-def fn_segmentation_diffusion(prompt, mask_indices, image, max_kernel, min_kernel, num_diffusion_steps):
-    mask_indices = [int(i) for i in mask_indices.split(',')]
-    inputs = feature_extractor(images=image, return_tensors="pt")
-    outputs = segmentation_model(**inputs)
-    processed_sizes = torch.as_tensor(inputs["pixel_values"].shape[-2:]).unsqueeze(0)
-    result = feature_extractor.post_process_panoptic(outputs, processed_sizes)[0]
-    panoptic_seg = Image.open(io.BytesIO(result["png_string"])).resize((image.width, image.height))
-    panoptic_seg = np.array(panoptic_seg, dtype=np.uint8)
-    class_str = '\n'.join(segmentation_cfg.id2label[s['category_id']] for s in result['segments_info'])
-    panoptic_seg_id = rgb_to_id(panoptic_seg)
-    if len(mask_indices) > 0:
-        mask = (panoptic_seg_id == mask_indices[0])
-    for idx in mask_indices[1:]:
-        mask = mask | (panoptic_seg_id == idx)
-    mask = clean_mask(mask, min_kernel=min_kernel, max_kernel=max_kernel)
-    masked_image = np.array(image).copy()
-    masked_image[mask] = 0
-    masked_image = Image.fromarray(masked_image).resize(image.size)
-    mask = Image.fromarray(mask.astype(np.uint8) * 255).resize(image.size)
-    if num_diffusion_steps == 0:
-        return masked_image, masked_image, class_str
-    STABLE_DIFFUSION_SMALL_EDGE = 512
-    assert masked_image.size == mask.size
-    w, h = masked_image.size
-    is_width_larger = w > h
-    resize_ratio = STABLE_DIFFUSION_SMALL_EDGE / (h if is_width_larger else w)
-    new_width = int(w * resize_ratio) if is_width_larger else STABLE_DIFFUSION_SMALL_EDGE
-    new_height = STABLE_DIFFUSION_SMALL_EDGE if is_width_larger else int(h * resize_ratio)
-    new_width += 8 - (new_width % 8) if is_width_larger else 0
-    new_height += 0 if is_width_larger else 8 - (new_height % 8)
-    mask = mask.convert("RGB").resize((new_width, new_height))
-    masked_image = masked_image.convert("RGB").resize((new_width, new_height))
-    inpainted_image = pipe(
-        height=new_height,
-        width=new_width,
-        prompt=prompt,
-        image=masked_image,
-        mask_image=mask,
-        num_inference_steps=num_diffusion_steps
-    ).images[0]
-    return masked_image, inpainted_image, class_str
-# iface_segmentation = gr.Interface(
-    # fn=fn_segmentation,
-    # inputs=[
-        # "text",
-        # "text",
-        # gr.Image(value="http://images.cocodataset.org/val2017/000000039769.jpg"),
-        # gr.Slider(minimum=1, maximum=99, value=23, step=2),
-        # gr.Slider(minimum=1, maximum=99, value=5, step=2),
-        # gr.Slider(minimum=0, maximum=100, value=50, step=1),
-    # ],
-    # outputs=["text", gr.Image(type="pil"), gr.Image(type="pil"), "number", "text"]
-# )
-# iface_diffusion = gr.Interface(
-    # fn=fn_diffusion,
-    # inputs=["text", gr.Image(type='pil'), gr.Image(type='pil'), "number", "text"],
-    # outputs=[gr.Image(), gr.Image(), gr.Textbox()]
-# )
-# iface = gr.Series(
-    # iface_segmentation, iface_diffusion,
-# iface = gr.Interface(
-    # fn=fn_segmentation_diffusion,
-    # inputs=[
-        # "text",
-        # "text",
-        # gr.Image(value="http://images.cocodataset.org/val2017/000000039769.jpg", type='pil'),
-        # gr.Slider(minimum=1, maximum=99, value=23, step=2),
-        # gr.Slider(minimum=1, maximum=99, value=5, step=2),
-        # gr.Slider(minimum=0, maximum=100, value=50, step=1),
-    # ],
-    # outputs=[gr.Image(), gr.Image(), gr.Textbox(interactive=False)]
-# )
-# iface = gr.Interface(
-    # fn=fn_segmentation,
-    # inputs=[
-        # gr.Image(value="http://images.cocodataset.org/val2017/000000039769.jpg", type='pil'),
-        # gr.Slider(minimum=1, maximum=99, value=23, step=2),
-        # gr.Slider(minimum=1, maximum=99, value=5, step=2),
-    # ],
-    # outputs=gr.Gallery()
-# )
-# iface.launch()
 demo = gr.Blocks()
 with demo:
-    input_image = gr.Image(value="http://images.cocodataset.org/val2017/000000039769.jpg", type='pil')
     bt_masks = gr.Button("Compute Masks")
     with gr.Row():
-        mask_image = gr.Image(type='numpy')
-        masked_image = gr.Image(type='pil')
     mask_storage = gr.State()
     with gr.Row():
-        max_slider = gr.Slider(minimum=1, maximum=99, value=23, step=2)
-        min_slider = gr.Slider(minimum=1, maximum=99, value=5, step=2)
-        mask_checkboxes = gr.CheckboxGroup(interactive=True)
     with gr.Row():
         with gr.Column():
-            prompt = gr.Textbox("Two ginger cats lying together on a pink sofa. There are two TV remotes. High definition.")
-            steps_slider = gr.Slider(minimum=1, maximum=100, value=50)
             bt_diffusion = gr.Button("Run Diffusion")
-    inpainted_image = gr.Image(type='pil')
-    bt_masks.click(fn_segmentation, inputs=[input_image, max_slider, min_slider], outputs=[mask_storage, mask_checkboxes, mask_image, masked_image])
-    max_slider.change(fn_update_mask, inputs=[input_image, mask_storage, mask_checkboxes, max_slider, min_slider], outputs=[mask_image, masked_image])
-    min_slider.change(fn_update_mask, inputs=[input_image, mask_storage, mask_checkboxes, max_slider, min_slider], outputs=[mask_image, masked_image])
-    mask_checkboxes.change(fn_update_mask, inputs=[input_image, mask_storage, mask_checkboxes, max_slider, min_slider], outputs=[mask_image, masked_image])
-    bt_diffusion.click(fn_diffusion, inputs=[prompt, masked_image, mask_image, steps_slider], outputs=inpainted_image)
 demo.launch()

 import torch
 from PIL import Image
 from skimage.measure import block_reduce
+from typing import List, Optional
 from functools import reduce
 import gradio as gr
 from diffusers import StableDiffusionInpaintPipeline
 torch.inference_mode()
 torch.no_grad()
 device = get_device()
 feature_extractor, segmentation_model, segmentation_cfg = load_segmentation_models()
 pipe = load_diffusion_pipeline()
 pipe = pipe.to(device)
         m = panoptic_seg_id == s['id']
         raw_masks.append(m.astype(np.uint8) * 255)
     checkbox_choices = [f"{s['id']}:{segmentation_cfg.id2label[s['category_id']]}" for s in result['segments_info']]
     checkbox_group = gr.CheckboxGroup.update(
     return combined_mask.astype(np.uint8) * 255, Image.fromarray(masked_image)
+def fn_diffusion(
+        prompt: str,
+        masked_image: Image,
+        mask: Image,
+        num_diffusion_steps: int,
+        guidance_scale: float,
+        negative_prompt: Optional[str] = None,
+    ):
+    if len(negative_prompt) == 0:
+        negative_prompt = None
     STABLE_DIFFUSION_SMALL_EDGE = 512
     w, h = masked_image.size
         prompt=prompt,
         image=masked_image,
         mask_image=mask,
+        num_inference_steps=num_diffusion_steps,
+        guidance_scale=guidance_scale,
+        negative_prompt=negative_prompt
     ).images[0]
     inpainted_image = inpainted_image.resize((w, h))
     return inpainted_image
 demo = gr.Blocks()
 with demo:
+    input_image = gr.Image(value="http://images.cocodataset.org/val2017/000000039769.jpg", type='pil', label="Input Image")
     bt_masks = gr.Button("Compute Masks")
     with gr.Row():
+        mask_image = gr.Image(type='numpy', label="Diffusion Mask")
+        masked_image = gr.Image(type='pil', label="Masked Image")
     mask_storage = gr.State()
     with gr.Row():
+        max_slider = gr.Slider(minimum=1, maximum=99, value=23, step=2, label="Mask Overflow")
+        min_slider = gr.Slider(minimum=1, maximum=99, value=5, step=2, label="Mask Denoising")
+        mask_checkboxes = gr.CheckboxGroup(interactive=True, label="Mask Selection")
     with gr.Row():
         with gr.Column():
+            prompt = gr.Textbox("Two ginger cats lying together on a pink sofa. There are two TV remotes. High definition.", label="Prompt")
+            negative_prompt = gr.Textbox(label="Negative Prompt")
+        with gr.Column():
+            steps_slider = gr.Slider(minimum=1, maximum=100, value=50, label="Inference Steps")
+            guidance_slider = gr.Slider(minimum=0.0, maximum=50.0, value=7.5, step=0.1, label="Guidance Scale")
             bt_diffusion = gr.Button("Run Diffusion")
+        inpainted_image = gr.Image(type='pil', label="Inpainted Image")
+    update_mask_inputs = [input_image, mask_storage, mask_checkboxes, max_slider, min_slider]
+    update_mask_outputs = [mask_image, masked_image]
+    input_image.change(lambda: gr.CheckboxGroup.update(choices=[], value=[]), outputs=mask_checkboxes)
+    bt_masks.click(fn_segmentation, inputs=[input_image, max_slider, min_slider], outputs=[mask_storage, mask_checkboxes, mask_image, masked_image])
+    max_slider.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs)
+    min_slider.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs)
+    mask_checkboxes.change(fn_update_mask, inputs=update_mask_inputs, outputs=update_mask_outputs)
+    bt_diffusion.click(fn_diffusion, inputs=[
+        prompt,
+        masked_image,
+        mask_image,
+        steps_slider,
+        guidance_slider,
+        negative_prompt
+    ], outputs=inpainted_image)
 demo.launch()