Spaces:

Arulkumar03
/

GroundingDINO_SOTA_Zero_Shot_Model

Running

App Files Files Community

Arulkumar03 commited on Oct 18, 2023

Commit

4b21d34

•

1 Parent(s): c25e3bd

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -59

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ from groundingdino.util.slconfig import SLConfig
 from groundingdino.util.utils import clean_state_dict
 from groundingdino.util.inference import annotate, load_image, predict
 import groundingdino.datasets.transforms as T
-from groundingdino.util import box_ops
 from huggingface_hub import hf_hub_download
@@ -64,62 +64,23 @@ def image_transform_grounding_for_vis(init_image):
 model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
-def segment(image, sam_model, boxes):
-  H, W, _ = image.shape
-  boxes_xyxy = box_ops.box_cxcywh_to_xyxy(boxes) * torch.Tensor([W, H, W, H])
-  transformed_boxes = sam_model.transform.apply_boxes_torch(boxes_xyxy.to(device), image.shape[:2])
-  masks, _, _ = sam_model.predict_torch(
-      point_coords = None,
-      point_labels = None,
-      boxes = transformed_boxes,
-      multimask_output = False,
-      )
-  return masks.cpu()
-def draw_mask(mask, image, random_color=True):
-    if random_color:
-        color = np.concatenate([np.random.random(3), np.array([0.8])], axis=0)
-    else:
-        color = np.array([30/255, 144/255, 255/255, 0.6])
-    h, w = mask.shape[-2:]
-    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
-    annotated_frame_pil = Image.fromarray(image).convert("RGBA")
-    mask_image_pil = Image.fromarray((mask_image.cpu().numpy() * 255).astype(np.uint8)).convert("RGBA")
-    return np.array(Image.alpha_composite(annotated_frame_pil, mask_image_pil))
-def run_grounding(input_image,choice, grounding_caption, box_threshold, text_threshold,do_segmentation):
     init_image = input_image.convert("RGB")
     original_size = init_image.size
     _, image_tensor = image_transform_grounding(init_image)
     image_pil: Image = image_transform_grounding_for_vis(init_image)
-    if choice == 'segment':
-        boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
-        segmented_frame_masks = segment(image_tensor, model, boxes=boxes)
-        annotated_frame_with_mask = draw_mask(segmented_frame_masks[0][0], annotated_frame)
-    else:
-        # run grounding
-        boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
-        annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
     image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
-    return image_with_box
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
-    parser.add_argument("--debug", action="store_true", help="using debug mode")
-    parser.add_argument("--share", action="store_true", help="share the app")
-    args = parser.parse_args()
     css = """
   #mkd {
     height: 500px;
@@ -133,13 +94,9 @@ if __name__ == "__main__":
         gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/Arulkumar03/SOTA-Grounding-DINO.ipynb'>Grounding DINO</a><h3><center>")
         gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(source='upload', type="pil")
-                choice = gr.Radio(
-                    ["segment", "classify"], default="segment", label="Choose Operation"
-                )
                 grounding_caption = gr.Textbox(label="Detection Prompt")
                 run_button = gr.Button(label="Run")
                 with gr.Accordion("Advanced options", open=False):
@@ -155,15 +112,18 @@ if __name__ == "__main__":
                     type="pil",
                     # label="grounding results"
                 ).style(full_width=True, full_height=True)
         run_button.click(fn=run_grounding, inputs=[
-                        input_image, choice, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
         gr.Examples(
-            [["watermelon.jpg", "segment", "watermelon", 0.25, 0.25]],
-            inputs=[input_image, choice, grounding_caption, box_threshold, text_threshold],
-            outputs=[gallery],
-            fn=run_grounding,
-            cache_examples=True,
-            label='Try this example input!'
-        )
-    block.launch(share=False, show_api=False, show_error=True)

 from groundingdino.util.utils import clean_state_dict
 from groundingdino.util.inference import annotate, load_image, predict
 import groundingdino.datasets.transforms as T
 from huggingface_hub import hf_hub_download
 model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
+def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
     init_image = input_image.convert("RGB")
     original_size = init_image.size
     _, image_tensor = image_transform_grounding(init_image)
     image_pil: Image = image_transform_grounding_for_vis(init_image)
+    # run grounidng
+    boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
+    annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
     image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
+    return image_with_box
 if __name__ == "__main__":
     css = """
   #mkd {
     height: 500px;
         gr.Markdown("<h3><center>Open-World Detection with <a href='https://github.com/Arulkumar03/SOTA-Grounding-DINO.ipynb'>Grounding DINO</a><h3><center>")
         gr.Markdown("<h3><center>Note the model runs on CPU, so it may take a while to run the model.<h3><center>")
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(source='upload', type="pil")
                 grounding_caption = gr.Textbox(label="Detection Prompt")
                 run_button = gr.Button(label="Run")
                 with gr.Accordion("Advanced options", open=False):
                     type="pil",
                     # label="grounding results"
                 ).style(full_width=True, full_height=True)
+                # gallery = gr.Gallery(label="Generated images", show_label=False).style(
+                #         grid=[1], height="auto", container=True, full_width=True, full_height=True)
         run_button.click(fn=run_grounding, inputs=[
+                        input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
         gr.Examples(
+          [["watermelon.jpg", "watermelon", 0.25, 0.25]],
+          inputs = [input_image, grounding_caption, box_threshold, text_threshold],
+          outputs = [gallery],
+          fn=run_grounding,
+          cache_examples=True,
+          label='Try this example input!'
+      )
+    block.launch(share=True, show_api=False, show_error=True)