Spaces:

chongzhou
/

EdgeSAM

Runtime error

App Files Files Community

chongzhou commited on Dec 14, 2023

Commit

435ddbc

•

1 Parent(s): 4c7dc89

save image embeddings in gradio session to avoid repeatedly encoding

Browse files

Files changed (3) hide show

app.py +10 -8
segment_anything/onnx/predictor_onnx.py +7 -2
segment_anything/predictor.py +14 -6

app.py CHANGED Viewed

@@ -107,6 +107,7 @@ def reset(session_state):
     session_state['box_list'] = []
     session_state['ori_image'] = None
     session_state['image_with_prompt'] = None
     return None, session_state
@@ -116,6 +117,7 @@ def reset_all(session_state):
     session_state['box_list'] = []
     session_state['ori_image'] = None
     session_state['image_with_prompt'] = None
     return None, None, session_state
@@ -145,8 +147,8 @@ def on_image_upload(
     session_state['ori_image'] = copy.deepcopy(image)
     session_state['image_with_prompt'] = copy.deepcopy(image)
     print("Image changed")
-    # nd_image = np.array(global_image)
-    # predictor.set_image(nd_image)
     return image, session_state
@@ -188,13 +190,11 @@ def segment_with_points(
     )
     image = session_state['image_with_prompt']
-    nd_image = np.array(session_state['ori_image'])
-    predictor.set_image(nd_image)
     if ENABLE_ONNX:
         coord_np = np.array(session_state['coord_list'])[None]
         label_np = np.array(session_state['label_list'])[None]
         masks, scores, _ = predictor.predict(
             point_coords=coord_np,
             point_labels=label_np,
         )
@@ -204,6 +204,7 @@ def segment_with_points(
         coord_np = np.array(session_state['coord_list'])
         label_np = np.array(session_state['label_list'])
         masks, scores, logits = predictor.predict(
             point_coords=coord_np,
             point_labels=label_np,
             num_multimask_outputs=4,
@@ -271,18 +272,18 @@ def segment_with_box(
         )
         box_np = np.array(box)
-        nd_image = np.array(session_state['ori_image'])
-        predictor.set_image(nd_image)
         if ENABLE_ONNX:
             point_coords = box_np.reshape(2, 2)[None]
             point_labels = np.array([2, 3])[None]
             masks, _, _ = predictor.predict(
                 point_coords=point_coords,
                 point_labels=point_labels,
             )
             annotations = masks[:, 0, :, :]
         else:
             masks, scores, _ = predictor.predict(
                 box=box_np,
                 num_multimask_outputs=1,
             )
@@ -312,7 +313,8 @@ with gr.Blocks(css=css, title="EdgeSAM") as demo:
         'label_list': [],
         'box_list': [],
         'ori_image': None,
-        'image_with_prompt': None
     })
     with gr.Row():

     session_state['box_list'] = []
     session_state['ori_image'] = None
     session_state['image_with_prompt'] = None
+    session_state['feature'] = None
     return None, session_state
     session_state['box_list'] = []
     session_state['ori_image'] = None
     session_state['image_with_prompt'] = None
+    session_state['feature'] = None
     return None, None, session_state
     session_state['ori_image'] = copy.deepcopy(image)
     session_state['image_with_prompt'] = copy.deepcopy(image)
     print("Image changed")
+    nd_image = np.array(image)
+    session_state['feature'] = predictor.set_image(nd_image)
     return image, session_state
     )
     image = session_state['image_with_prompt']
     if ENABLE_ONNX:
         coord_np = np.array(session_state['coord_list'])[None]
         label_np = np.array(session_state['label_list'])[None]
         masks, scores, _ = predictor.predict(
+            features=session_state['feature'],
             point_coords=coord_np,
             point_labels=label_np,
         )
         coord_np = np.array(session_state['coord_list'])
         label_np = np.array(session_state['label_list'])
         masks, scores, logits = predictor.predict(
+            features=session_state['feature'],
             point_coords=coord_np,
             point_labels=label_np,
             num_multimask_outputs=4,
         )
         box_np = np.array(box)
         if ENABLE_ONNX:
             point_coords = box_np.reshape(2, 2)[None]
             point_labels = np.array([2, 3])[None]
             masks, _, _ = predictor.predict(
+                features=session_state['feature'],
                 point_coords=point_coords,
                 point_labels=point_labels,
             )
             annotations = masks[:, 0, :, :]
         else:
             masks, scores, _ = predictor.predict(
+                features=session_state['feature'],
                 box=box_np,
                 num_multimask_outputs=1,
             )
         'label_list': [],
         'box_list': [],
         'ori_image': None,
+        'image_with_prompt': None,
+        'feature': None
     })
     with gr.Row():

segment_anything/onnx/predictor_onnx.py CHANGED Viewed

@@ -60,17 +60,22 @@ class SamPredictorONNX:
         self.features = outputs[0]
         self.is_image_set = True
     def predict(
             self,
             point_coords: Optional[np.ndarray] = None,
             point_labels: Optional[np.ndarray] = None,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-        if not self.is_image_set:
             raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
         point_coords = self.transform.apply_coords(point_coords, self.original_size)
         outputs = self.decoder.run(None, {
-            'image_embeddings': self.features,
             'point_coords': point_coords.astype(np.float32),
             'point_labels': point_labels.astype(np.float32)
         })

         self.features = outputs[0]
         self.is_image_set = True
+        return self.features
     def predict(
             self,
+            features: np.ndarray = None,
             point_coords: Optional[np.ndarray] = None,
             point_labels: Optional[np.ndarray] = None,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        if features is None and not self.is_image_set:
             raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
+        if features is None:
+            features = self.features
         point_coords = self.transform.apply_coords(point_coords, self.original_size)
         outputs = self.decoder.run(None, {
+            'image_embeddings': features,
             'point_coords': point_coords.astype(np.float32),
             'point_labels': point_labels.astype(np.float32)
         })

segment_anything/predictor.py CHANGED Viewed

@@ -37,7 +37,7 @@ class SamPredictor:
         self,
         image: np.ndarray,
         image_format: str = "RGB",
-    ) -> None:
         """
         Calculates the image embeddings for the provided image, allowing
         masks to be predicted with the 'predict' method.
@@ -59,14 +59,14 @@ class SamPredictor:
         input_image_torch = torch.as_tensor(input_image, device=self.device)
         input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
-        self.set_torch_image(input_image_torch, image.shape[:2])
     @torch.no_grad()
     def set_torch_image(
         self,
         transformed_image: torch.Tensor,
         original_image_size: Tuple[int, ...],
-    ) -> None:
         """
         Calculates the image embeddings for the provided image, allowing
         masks to be predicted with the 'predict' method. Expects the input
@@ -91,8 +91,11 @@ class SamPredictor:
         self.features = self.model.image_encoder(input_image)
         self.is_image_set = True
     def predict(
         self,
         point_coords: Optional[np.ndarray] = None,
         point_labels: Optional[np.ndarray] = None,
         box: Optional[np.ndarray] = None,
@@ -131,9 +134,12 @@ class SamPredictor:
             of masks and H=W=256. These low resolution logits can be passed to
             a subsequent iteration as mask input.
         """
-        if not self.is_image_set:
             raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
         # Transform input prompts
         coords_torch, labels_torch, box_torch, mask_input_torch = None, None, None, None
         if point_coords is not None:
@@ -153,6 +159,7 @@ class SamPredictor:
             mask_input_torch = mask_input_torch[None, :, :, :]
         masks, iou_predictions, low_res_masks = self.predict_torch(
             coords_torch,
             labels_torch,
             box_torch,
@@ -170,6 +177,7 @@ class SamPredictor:
     @torch.no_grad()
     def predict_torch(
         self,
         point_coords: Optional[torch.Tensor],
         point_labels: Optional[torch.Tensor],
         boxes: Optional[torch.Tensor] = None,
@@ -211,7 +219,7 @@ class SamPredictor:
             of masks and H=W=256. These low res logits can be passed to
             a subsequent iteration as mask input.
         """
-        if not self.is_image_set:
             raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
         if point_coords is not None:
@@ -228,7 +236,7 @@ class SamPredictor:
         # Predict masks
         low_res_masks, iou_predictions = self.model.mask_decoder(
-            image_embeddings=self.features,
             image_pe=self.model.prompt_encoder.get_dense_pe(),
             sparse_prompt_embeddings=sparse_embeddings,
             dense_prompt_embeddings=dense_embeddings,

         self,
         image: np.ndarray,
         image_format: str = "RGB",
+    ) -> torch.Tensor:
         """
         Calculates the image embeddings for the provided image, allowing
         masks to be predicted with the 'predict' method.
         input_image_torch = torch.as_tensor(input_image, device=self.device)
         input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
+        return self.set_torch_image(input_image_torch, image.shape[:2])
     @torch.no_grad()
     def set_torch_image(
         self,
         transformed_image: torch.Tensor,
         original_image_size: Tuple[int, ...],
+    ) -> torch.Tensor:
         """
         Calculates the image embeddings for the provided image, allowing
         masks to be predicted with the 'predict' method. Expects the input
         self.features = self.model.image_encoder(input_image)
         self.is_image_set = True
+        return self.features
     def predict(
         self,
+        features: torch.Tensor = None,
         point_coords: Optional[np.ndarray] = None,
         point_labels: Optional[np.ndarray] = None,
         box: Optional[np.ndarray] = None,
             of masks and H=W=256. These low resolution logits can be passed to
             a subsequent iteration as mask input.
         """
+        if features is None and not self.is_image_set:
             raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
+        if features is None:
+            features = self.features
         # Transform input prompts
         coords_torch, labels_torch, box_torch, mask_input_torch = None, None, None, None
         if point_coords is not None:
             mask_input_torch = mask_input_torch[None, :, :, :]
         masks, iou_predictions, low_res_masks = self.predict_torch(
+            features,
             coords_torch,
             labels_torch,
             box_torch,
     @torch.no_grad()
     def predict_torch(
         self,
+        features: torch.Tensor,
         point_coords: Optional[torch.Tensor],
         point_labels: Optional[torch.Tensor],
         boxes: Optional[torch.Tensor] = None,
             of masks and H=W=256. These low res logits can be passed to
             a subsequent iteration as mask input.
         """
+        if features is None and not self.is_image_set:
             raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
         if point_coords is not None:
         # Predict masks
         low_res_masks, iou_predictions = self.model.mask_decoder(
+            image_embeddings=features,
             image_pe=self.model.prompt_encoder.get_dense_pe(),
             sparse_prompt_embeddings=sparse_embeddings,
             dense_prompt_embeddings=dense_embeddings,