Spaces:

Arulkumar03
/

GroundingDINO_SOTA_Zero_Shot_Model

Running

App Files Files Community

Arulkumar03 commited on Oct 18, 2023

Commit

96b3f69

•

1 Parent(s): 3fefe22

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -8

app.py CHANGED Viewed

@@ -62,8 +62,37 @@ def image_transform_grounding_for_vis(init_image):
     image, _ = transform(init_image, None) # 3, h, w
     return image
-model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
 def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
     init_image = input_image.convert("RGB")
     original_size = init_image.size
@@ -72,12 +101,21 @@ def run_grounding(input_image, grounding_caption, box_threshold, text_threshold)
     image_pil: Image = image_transform_grounding_for_vis(init_image)
     # run grounidng
-    boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
-    annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
-    image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
-    return image_with_box
 if __name__ == "__main__":
@@ -124,7 +162,7 @@ if __name__ == "__main__":
         gr.Examples(
           [["watermelon.jpg", "watermelon", 0.25, 0.25]],
           inputs = [input_image, grounding_caption, box_threshold, text_threshold],
-          outputs = [gallery],
           fn=run_grounding,
           cache_examples=True,
           label='Try this example input!'

     image, _ = transform(init_image, None) # 3, h, w
     return image
+model = load_model_hf(task, config_file, ckpt_repo_id, ckpt_filenmae)
+def segment(image, sam_model, boxes):
+  sam_model.set_image(image)
+  H, W, _ = image.shape
+  boxes_xyxy = box_ops.box_cxcywh_to_xyxy(boxes) * torch.Tensor([W, H, W, H])
+  transformed_boxes = sam_model.transform.apply_boxes_torch(boxes_xyxy.to(device), image.shape[:2])
+  masks, _, _ = sam_model.predict_torch(
+      point_coords = None,
+      point_labels = None,
+      boxes = transformed_boxes,
+      multimask_output = False,
+      )
+  return masks.cpu()
+def draw_mask(mask, image, random_color=True):
+    if random_color:
+        color = np.concatenate([np.random.random(3), np.array([0.8])], axis=0)
+    else:
+        color = np.array([30/255, 144/255, 255/255, 0.6])
+    h, w = mask.shape[-2:]
+    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
+    annotated_frame_pil = Image.fromarray(image).convert("RGBA")
+    mask_image_pil = Image.fromarray((mask_image.cpu().numpy() * 255).astype(np.uint8)).convert("RGBA")
+    return np.array(Image.alpha_composite(annotated_frame_pil, mask_image_pil))
 def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
     init_image = input_image.convert("RGB")
     original_size = init_image.size
     image_pil: Image = image_transform_grounding_for_vis(init_image)
     # run grounidng
+    if task=='predict':
+        boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
+        annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
+        image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
+        return image_with_box
+    elif task=='segment':
+        boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
+        segmented_frame_masks = segment(image_tensor, model, boxes=boxes)
+        annotated_frame_with_mask = draw_mask(segmented_frame_masks[0][0], annotated_frame)
+        seg_with_bbox=Image.fromarray(annotated_frame_with_mask)
+        return seg_with_bbox
 if __name__ == "__main__":
         gr.Examples(
           [["watermelon.jpg", "watermelon", 0.25, 0.25]],
           inputs = [input_image, grounding_caption, box_threshold, text_threshold],
+          outputs = [gallery],gr.Choice(["segment", "classify"], label="Select Task")],
           fn=run_grounding,
           cache_examples=True,
           label='Try this example input!'