Spaces:

hmdliu
/

sidewalks-seg

Sleeping

App Files Files Community

hmdliu commited on May 6

Commit

fadb2ab

•

1 Parent(s): 54b7544

Add mask prompt

Browse files

Files changed (2) hide show

app.py +54 -2
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -3,8 +3,10 @@ import numpy as np
 import gradio as gr
 import matplotlib.pyplot as plt
 from PIL import Image
 from transformers import SamModel, SamProcessor
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 processor = SamProcessor.from_pretrained('facebook/sam-vit-base')
 model = SamModel.from_pretrained('hmdliu/sidewalks-seg')
@@ -62,6 +64,34 @@ def segment_image_with_guidance(image, threshold, offset, x_min, y_min, x_max, y
     regions = [(guidance_mask, 'Guidance'), (pred_mask, 'Sidewalks')]
     return (image['background'], regions), Image.open('prob.png')
 with gr.Blocks() as demo:
     with gr.Tab('Baseline'):
         with gr.Row():
@@ -78,10 +108,10 @@ with gr.Blocks() as demo:
                 t1_pred = gr.AnnotatedImage(color_map={'Sidewalks': '#0000FF'}, label='Prediction')
             with gr.Column():
                 t1_prob_map = gr.Image(type='pil', label='Probability Map')
-    with gr.Tab('Mask Guidance'):
         with gr.Row():
             with gr.Column():
-                t2_input = gr.ImageEditor(type='pil', crop_size='2:1', label='Input Image',
                                           brush=gr.Brush(default_size='5', color_mode='fixed'),
                                           sources=['upload'], transforms=[])
                 with gr.Row():
@@ -96,6 +126,23 @@ with gr.Blocks() as demo:
                 t2_pred = gr.AnnotatedImage(color_map={'Guidance': '#FF0000', 'Sidewalks': '#0000FF'}, label='Prediction')
             with gr.Column():
                 t2_prob_map = gr.Image(type='pil', label='Probability Map')
     t1_segment.click(
         segment_image,
         inputs=[t1_input, t1_slider, t1_x_min, t1_y_min, t1_x_max, t1_y_max],
@@ -106,4 +153,9 @@ with gr.Blocks() as demo:
         inputs=[t2_input, t2_thresh, t2_offset, t2_x_min, t2_y_min, t2_x_max, t2_y_max],
         outputs=[t2_pred, t2_prob_map]
     )
 demo.launch(debug=True, show_error=True)

 import gradio as gr
 import matplotlib.pyplot as plt
 from PIL import Image
+from torchvision.transforms import ToTensor
 from transformers import SamModel, SamProcessor
+to_tensor = ToTensor()
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 processor = SamProcessor.from_pretrained('facebook/sam-vit-base')
 model = SamModel.from_pretrained('hmdliu/sidewalks-seg')
     regions = [(guidance_mask, 'Guidance'), (pred_mask, 'Sidewalks')]
     return (image['background'], regions), Image.open('prob.png')
+def segment_image_with_prompt(image, threshold, x_min, y_min, x_max, y_max):
+    # tolerate TIFF image input
+    image['background'].save('image.png')
+    # init input data
+    img = Image.open('image.png').convert('RGB')
+    mask = (np.max(np.array(image['layers'][0]), axis=2) != 0)
+    mask_prompt = to_tensor(mask).float()
+    box_prompt = [[[x_min, y_min, x_max, y_max]]]
+    inputs = processor(img, input_boxes=box_prompt,
+                       input_masks=mask_prompt, return_tensors='pt')
+    # make prediction
+    outputs = model(pixel_values=inputs['pixel_values'].to(device),
+                    input_boxes=inputs['input_boxes'].to(device),
+                    input_masks=mask_prompt.to(device),
+                    multimask_output=False)
+    prob_map = torch.sigmoid(outputs.pred_masks.squeeze()).cpu().detach()
+    pred_mask = (prob_map > threshold).float().numpy()
+    # visualize results
+    plt.figure(figsize=(8, 8))
+    plt.imshow(prob_map.numpy(), cmap='jet', interpolation='nearest')
+    plt.axis('off')
+    plt.tight_layout()
+    plt.savefig('prob.png', bbox_inches='tight', pad_inches=0)
+    plt.close()
+    # post-processing
+    regions = [(mask, 'Prompt'), (pred_mask, 'Sidewalks')]
+    return (image['background'], regions), Image.open('prob.png')
 with gr.Blocks() as demo:
     with gr.Tab('Baseline'):
         with gr.Row():
                 t1_pred = gr.AnnotatedImage(color_map={'Sidewalks': '#0000FF'}, label='Prediction')
             with gr.Column():
                 t1_prob_map = gr.Image(type='pil', label='Probability Map')
+    with gr.Tab('Mask Guidance (Best)'):
         with gr.Row():
             with gr.Column():
+                t2_input = gr.ImageEditor(type='pil', crop_size='1:1', label='Input Image',
                                           brush=gr.Brush(default_size='5', color_mode='fixed'),
                                           sources=['upload'], transforms=[])
                 with gr.Row():
                 t2_pred = gr.AnnotatedImage(color_map={'Guidance': '#FF0000', 'Sidewalks': '#0000FF'}, label='Prediction')
             with gr.Column():
                 t2_prob_map = gr.Image(type='pil', label='Probability Map')
+    with gr.Tab('Mask Prompt'):
+        with gr.Row():
+            with gr.Column():
+                t3_input = gr.ImageEditor(type='pil', crop_size='1:1', label='Input Image',
+                                          brush=gr.Brush(default_size='5', color_mode='fixed'),
+                                          sources=['upload'], transforms=[])
+                with gr.Row():
+                    t3_x_min = gr.Textbox(value=0, label='x_min')
+                    t3_y_min = gr.Textbox(value=0, label='y_min')
+                    t3_x_max = gr.Textbox(value=256, label='x_max')
+                    t3_y_max = gr.Textbox(value=256, label='y_max')
+                t3_thresh = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label='Prediction Threshold')
+                t3_segment = gr.Button('Segment')
+            with gr.Column():
+                t3_pred = gr.AnnotatedImage(color_map={'Prompt': '#FF0000', 'Sidewalks': '#0000FF'}, label='Prediction')
+            with gr.Column():
+                t3_prob_map = gr.Image(type='pil', label='Probability Map')
     t1_segment.click(
         segment_image,
         inputs=[t1_input, t1_slider, t1_x_min, t1_y_min, t1_x_max, t1_y_max],
         inputs=[t2_input, t2_thresh, t2_offset, t2_x_min, t2_y_min, t2_x_max, t2_y_max],
         outputs=[t2_pred, t2_prob_map]
     )
+    t3_segment.click(
+        segment_image_with_prompt,
+        inputs=[t3_input, t3_thresh, t3_x_min, t3_y_min, t3_x_max, t3_y_max],
+        outputs=[t3_pred, t3_prob_map]
+    )
 demo.launch(debug=True, show_error=True)

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 torch
 matplotlib
 transformers

 torch
+torchvision
 matplotlib
 transformers