yuxindu
/

segvol

@@ -6,11 +6,9 @@ class SegVolConfig(PretrainedConfig):
     def __init__(
         self,
         test_mode=True,
-        test_w_zoom=False,
         **kwargs,
     ):
         self.spatial_size = [32, 256, 256]
         self.patch_size = [4, 16, 16]
         self.test_mode = test_mode
-        self.test_w_zoom = test_w_zoom
         super().__init__(**kwargs)

     def __init__(
         self,
         test_mode=True,
         **kwargs,
     ):
         self.spatial_size = [32, 256, 256]
         self.patch_size = [4, 16, 16]
         self.test_mode = test_mode
         super().__init__(**kwargs)

model_segvol_single.py CHANGED Viewed

@@ -26,8 +26,16 @@ class SegVolModel(PreTrainedModel):
         self.processor = SegVolProcessor(spatial_size=self.config.spatial_size)
-    def forward(self, image, text=None, boxes=None, points=None, **kwargs):
-        return self.model.forward(image, text=text, boxes=boxes, points=points, **kwargs)
 # processor
 class SegVolProcessor():
@@ -89,15 +97,15 @@ class SegVolProcessor():
         item['zoom_out_label'] = item_zoom_out['label']
         return item
-    def point_prompt(self, label_single_resize):
         point, point_label = select_points(label_single_resize, num_positive_extra=3, num_negative_extra=3)
         points_single = (point.unsqueeze(0).float().cuda(), point_label.unsqueeze(0).float().cuda())
-        binary_points_resize = build_binary_points(point, point_label, label_single_resize.shape)
         return points_single, binary_points_resize
-    def bbox_prompt(self, label_single_resize):
         box_single = generate_box(label_single_resize).unsqueeze(0).float().cuda()
-        binary_cube_resize = build_binary_cube(box_single, binary_cube_shape=label_single_resize.shape)
         return box_single, binary_cube_resize
 class MinMaxNormalization(transforms.Transform):
@@ -461,6 +469,7 @@ class TextEncoder(nn.Module):
         if text is None:
             return None
         if type(text) is str:
             text = [text]
         tokens = self.organ2tokens(text)
         clip_outputs = self.clip_text_model(**tokens)

         self.processor = SegVolProcessor(spatial_size=self.config.spatial_size)
+    def forward(self, image, zoomed_image=None, text_prompt=None, bbox_prompt=None, point_prompt=None, **kwargs):
+        print(image.shape, zoomed_image.shape, text_prompt)
+        print(bbox_prompt[0].shape, bbox_prompt[1].shape, point_prompt[0].shape, point_prompt[1].shape)
+        # test mode
+        if self.config.test_mode:
+            return
+        else:
+            print('unsupport training mode now')
+            return
+        return self.model.forward(image, text=None, boxes=None, points=None, **kwargs)
 # processor
 class SegVolProcessor():
         item['zoom_out_label'] = item_zoom_out['label']
         return item
+    def point_prompt_b(self, label_single_resize):
         point, point_label = select_points(label_single_resize, num_positive_extra=3, num_negative_extra=3)
         points_single = (point.unsqueeze(0).float().cuda(), point_label.unsqueeze(0).float().cuda())
+        binary_points_resize = build_binary_points(point, point_label, label_single_resize.shape).unsqueeze(0).unsqueeze(0)
         return points_single, binary_points_resize
+    def bbox_prompt_b(self, label_single_resize):
         box_single = generate_box(label_single_resize).unsqueeze(0).float().cuda()
+        binary_cube_resize = build_binary_cube(box_single, binary_cube_shape=label_single_resize.shape).unsqueeze(0).unsqueeze(0)
         return box_single, binary_cube_resize
 class MinMaxNormalization(transforms.Transform):
         if text is None:
             return None
         if type(text) is str:
+            # text is supposed to be list
             text = [text]
         tokens = self.organ2tokens(text)
         clip_outputs = self.clip_text_model(**tokens)