Spaces:

majinyu
/

recognize-detect-segment-anything

Runtime error

majinyu commited on Jun 20, 2023

Commit

b683f14

•

1 Parent(s): 0da4c5f

print info to track progress

Files changed (1) hide show

app.py CHANGED Viewed

@@ -125,6 +125,7 @@ def draw_box(box, draw, label):
 def inference(raw_image, specified_tags, tagging_model_type, tagging_model, grounding_dino_model, sam_model):
     raw_image = raw_image.convert("RGB")
     # run tagging model
@@ -165,6 +166,7 @@ def inference(raw_image, specified_tags, tagging_model_type, tagging_model, grou
     boxes_filt, scores, pred_phrases = get_grounding_output(
         grounding_dino_model, image, tags, box_threshold, text_threshold, device=device
     )
     # run SAM
     image = np.asarray(raw_image)
@@ -179,13 +181,13 @@ def inference(raw_image, specified_tags, tagging_model_type, tagging_model, grou
     boxes_filt = boxes_filt.cpu()
     # use NMS to handle overlapped boxes
-    nms_idx = torchvision.ops.nms(
-        boxes_filt, scores, iou_threshold).numpy().tolist()
     boxes_filt = boxes_filt[nms_idx]
     pred_phrases = [pred_phrases[idx] for idx in nms_idx]
-    transformed_boxes = sam_model.transform.apply_boxes_torch(
-        boxes_filt, image.shape[:2]).to(device)
     masks, _, _ = sam_model.predict_torch(
         point_coords=None,
@@ -193,6 +195,7 @@ def inference(raw_image, specified_tags, tagging_model_type, tagging_model, grou
         boxes=transformed_boxes.to(device),
         multimask_output=False,
     )
     # draw output image
     mask_image = Image.new('RGBA', size, color=(0, 0, 0, 0))

 def inference(raw_image, specified_tags, tagging_model_type, tagging_model, grounding_dino_model, sam_model):
+    print(f"Start processing, image size {raw_image.size}")
     raw_image = raw_image.convert("RGB")
     # run tagging model
     boxes_filt, scores, pred_phrases = get_grounding_output(
         grounding_dino_model, image, tags, box_threshold, text_threshold, device=device
     )
+    print("GroundingDINO finished")
     # run SAM
     image = np.asarray(raw_image)
     boxes_filt = boxes_filt.cpu()
     # use NMS to handle overlapped boxes
+    print(f"Before NMS: {boxes_filt.shape[0]} boxes")
+    nms_idx = torchvision.ops.nms(boxes_filt, scores, iou_threshold).numpy().tolist()
     boxes_filt = boxes_filt[nms_idx]
     pred_phrases = [pred_phrases[idx] for idx in nms_idx]
+    print(f"After NMS: {boxes_filt.shape[0]} boxes")
+    transformed_boxes = sam_model.transform.apply_boxes_torch(boxes_filt, image.shape[:2]).to(device)
     masks, _, _ = sam_model.predict_torch(
         point_coords=None,
         boxes=transformed_boxes.to(device),
         multimask_output=False,
     )
+    print("SAM finished")
     # draw output image
     mask_image = Image.new('RGBA', size, color=(0, 0, 0, 0))