Grounding_DINO_demo

Running

ShilongLiu commited on Mar 28, 2023

Commit

af720a1

•

1 Parent(s): 27486e3

cpu only

Files changed (2) hide show

app.py CHANGED Viewed

@@ -34,10 +34,10 @@ ckpt_repo_id = "ShilongLiu/GroundingDINO"
 ckpt_filenmae = "groundingdino_swint_ogc.pth"
-def load_model_hf(model_config_path, repo_id, filename):
     args = SLConfig.fromfile(model_config_path)
-    args.device = 'cuda'
     model = build_model(args)
     cache_file = hf_hub_download(repo_id=repo_id, filename=filename)
     checkpoint = torch.load(cache_file, map_location='cpu')
@@ -72,7 +72,7 @@ def run_grounding(input_image, grounding_caption, box_threshold, text_threshold)
     image_pil: Image = image_transform_grounding_for_vis(init_image)
     # run grounidng
-    boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold)
     annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
     image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))

 ckpt_filenmae = "groundingdino_swint_ogc.pth"
+def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
     args = SLConfig.fromfile(model_config_path)
     model = build_model(args)
+    args.device = device
     cache_file = hf_hub_download(repo_id=repo_id, filename=filename)
     checkpoint = torch.load(cache_file, map_location='cpu')
     image_pil: Image = image_transform_grounding_for_vis(init_image)
     # run grounidng
+    boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
     annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
     image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))

groundingdino/util/inference.py CHANGED Viewed

@@ -21,9 +21,9 @@ def preprocess_caption(caption: str) -> str:
     return result + "."
-def load_model(model_config_path: str, model_checkpoint_path: str):
     args = SLConfig.fromfile(model_config_path)
-    args.device = "cuda"
     model = build_model(args)
     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
     model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
@@ -50,12 +50,13 @@ def predict(
         image: torch.Tensor,
         caption: str,
         box_threshold: float,
-        text_threshold: float
 ) -> Tuple[torch.Tensor, torch.Tensor, List[str]]:
     caption = preprocess_caption(caption=caption)
-    model = model.cuda()
-    image = image.cuda()
     with torch.no_grad():
         outputs = model(image[None], captions=[caption])

     return result + "."
+def load_model(model_config_path: str, model_checkpoint_path: str, device='cuda'):
     args = SLConfig.fromfile(model_config_path)
+    args.device = device
     model = build_model(args)
     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
     model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
         image: torch.Tensor,
         caption: str,
         box_threshold: float,
+        text_threshold: float,
+        device='cuda',
 ) -> Tuple[torch.Tensor, torch.Tensor, List[str]]:
     caption = preprocess_caption(caption=caption)
+    model = model.to(device)
+    image = image.to(device)
     with torch.no_grad():
         outputs = model(image[None], captions=[caption])