Spaces:

xuan2k
/

Thesis-Demo

Runtime error

App Files Files Community

xuan2k commited on May 13, 2024

Commit

cde08ad

1 Parent(s): 8511bb4

update thesis demo with SAM

Browse files

Files changed (2) hide show

gradio_test.py +69 -0
test.py +92 -207

gradio_test.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import gradio as gr
+import cv2
+import numpy as np
+from PIL import Image
+block = gr.Blocks(
+            title="SAM and others",
+            # theme="shivi/calm_seafoam@>=0.0.1,<1.0.0",
+        )
+colors = [(255, 0, 0), (0, 255, 0)]
+markers = [1, 5]
+def get_point(img, sel_pix, evt: gr.SelectData):
+        img = np.array(img, dtype=np.uint8)
+        sel_pix.append(evt.index)
+        # draw points
+        print(sel_pix)
+        for point in sel_pix:
+            cv2.drawMarker(img, point, colors[0], markerType=markers[0], markerSize=6, thickness=2)
+        return Image.fromarray(img).convert("RGB")
+def undo_button(orig_img, sel_pix):
+    temp = orig_img.copy()
+    temp = np.array(temp, dtype=np.uint8)
+    if len(sel_pix) != 0:
+        sel_pix.pop()
+        for point in sel_pix:
+            cv2.drawMarker(temp, point, colors[0], markerType=markers[0], markerSize=6, thickness=2)
+    return Image.fromarray(temp).convert("RGB")
+def toggle_button(orig_img, mode):
+    print(mode)
+    if mode:
+        ret = gr.Image(value= orig_img,elem_id="image_upload", type='pil', label="Upload", height=512, tool = "editor")# tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
+    else:
+        ret = gr.Image(value = orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
+    mode = not mode
+    return ret, mode
+def store_img(img):
+    print("call for store")
+    return img, []  # when new image is uploaded, `selected_points` should be empty
+with block:
+    selected_points = gr.State([])
+    original_image = gr.State()
+    mode = gr.State(True)
+    input_image = gr.Image(elem_id="image_upload", type='pil', label="Upload", height=512,)# tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
+    undo = gr.Button("undo mode", visible=True)
+    toggle = gr.Button("toggle mode", visible=True)
+    input_image.upload(
+        store_img,
+        [input_image],
+        [original_image, selected_points]
+    )
+    input_image.select(
+        get_point,
+        [input_image, selected_points],
+        [input_image]
+    )
+    undo.click(fn=undo_button, inputs=[original_image, selected_points], outputs=[input_image])
+    toggle.click(fn=toggle_button, inputs=[original_image, mode], outputs=[input_image, mode])
+block.launch()

test.py CHANGED Viewed

@@ -123,6 +123,44 @@ ram_model = None
 kosmos_model = None
 kosmos_processor = None
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
     args = SLConfig.fromfile(model_config_path)
     model = build_model(args)
@@ -290,13 +328,6 @@ def set_device(args):
         device = 'cpu'
     print(f'device={device}')
-def load_groundingdino_model(device):
-    # initialize groundingdino model
-    global groundingdino_model
-    logger.info(f"initialize groundingdino model...")
-    groundingdino_model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae, device=device) #'cpu')
-    logger.info(f"initialize groundingdino model...{type(groundingdino_model)}")
 def get_sam_vit_h_4b8939():
     if not os.path.exists('./sam_vit_h_4b8939.pth'):
         logger.info(f"get sam_vit_h_4b8939.pth...")
@@ -327,16 +358,6 @@ def load_sd_model(device):
         )
         sd_model = sd_model.to(device)
-def load_lama_cleaner_model(device):
-    # initialize lama_cleaner
-    global lama_cleaner_model
-    logger.info(f"initialize lama_cleaner...")
-    lama_cleaner_model = ModelManager(
-            name='lama',
-            device=device,
-        )
 def lama_cleaner_process(image, mask, cleaner_size_limit=1080):
     try:
         logger.info(f'_______lama_cleaner_process_______1____')
@@ -413,41 +434,6 @@ def lama_cleaner_process(image, mask, cleaner_size_limit=1080):
         image = None
     return  image
-class Ram_Predictor(RamPredictor):
-    def __init__(self, config, device='cpu'):
-        self.config = config
-        self.device = torch.device(device)
-        self._build_model()
-    def _build_model(self):
-        self.model = RamModel(**self.config.model).to(self.device)
-        if self.config.load_from is not None:
-            self.model.load_state_dict(torch.load(self.config.load_from, map_location=self.device))
-        self.model.train()
-def load_ram_model(device):
-    # load ram model
-    global ram_model
-    if os.environ.get('IS_MY_DEBUG') is not None:
-        return
-    model_path = "./checkpoints/ram_epoch12.pth"
-    ram_config = dict(
-        model=dict(
-            pretrained_model_name_or_path='bert-base-uncased',
-            load_pretrained_weights=False,
-            num_transformer_layer=2,
-            input_feature_size=256,
-            output_feature_size=768,
-            cls_feature_size=512,
-            num_relation_classes=56,
-            pred_type='attention',
-            loss_type='multi_label_ce',
-        ),
-        load_from=model_path,
-    )
-    ram_config = mmengine_Config(ram_config)
-    ram_model = Ram_Predictor(ram_config, device)
 # visualization
 def draw_selected_mask(mask, draw):
     color = (255, 0, 0, 153)
@@ -524,52 +510,6 @@ def concatenate_images_vertical(image1, image2):
     return new_image
-def relate_anything(input_image, k):
-    logger.info(f'relate_anything_1_{input_image.size}_')
-    w, h = input_image.size
-    max_edge = 1500
-    if w > max_edge or h > max_edge:
-        ratio = max(w, h) / max_edge
-        new_size = (int(w / ratio), int(h / ratio))
-        input_image.thumbnail(new_size)
-    logger.info(f'relate_anything_2_')
-    # load image
-    pil_image = input_image.convert('RGBA')
-    image = np.array(input_image)
-    sam_masks = sam_mask_generator.generate(image)
-    filtered_masks = sort_and_deduplicate(sam_masks)
-    logger.info(f'relate_anything_3_')
-    feat_list = []
-    for fm in filtered_masks:
-        feat = torch.Tensor(fm['feat']).unsqueeze(0).unsqueeze(0).to(device)
-        feat_list.append(feat)
-    feat = torch.cat(feat_list, dim=1).to(device)
-    matrix_output, rel_triplets = ram_model.predict(feat)
-    logger.info(f'relate_anything_4_')
-    pil_image_list = []
-    for i, rel in enumerate(rel_triplets[:k]):
-        s,o,r = int(rel[0]),int(rel[1]),int(rel[2])
-        relation = relation_classes[r]
-        mask_image = Image.new('RGBA', pil_image.size, color=(0, 0, 0, 0))
-        mask_draw = ImageDraw.Draw(mask_image)
-        draw_selected_mask(filtered_masks[s]['segmentation'], mask_draw)
-        draw_object_mask(filtered_masks[o]['segmentation'], mask_draw)
-        current_pil_image = pil_image.copy()
-        current_pil_image.alpha_composite(mask_image)
-        title_image = create_title_image('Red', relation, 'Blue', current_pil_image.size[0])
-        concate_pil_image = concatenate_images_vertical(current_pil_image, title_image)
-        pil_image_list.append(concate_pil_image)
-    logger.info(f'relate_anything_5_{len(pil_image_list)}')
-    return pil_image_list
 mask_source_draw = "draw a mask on input image"
 mask_source_segment = "type what to detect below"
@@ -584,7 +524,7 @@ def get_time_cost(run_task_time, time_cost_str):
     run_task_time = now_time
     return run_task_time, time_cost_str
-def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input, cleaner_size_limit=1080):
     text_prompt = getTextTrans(text_prompt, source='zh', target='en')
@@ -607,15 +547,10 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         return None, None, time_cost_str, kosmos_image, gr.Textbox.update(visible=(time_cost_str !='')), kosmos_text, kosmos_entities
-    if (task_type == 'relate anything'):
-        output_images = relate_anything(input_image['image'], num_relation)
-        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
-        return output_images, gr.Gallery.update(label='relate images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     text_prompt = text_prompt.strip()
-    if not ((task_type in ['inpainting', 'outpainting'] or task_type == 'remove') and mask_source_radio == mask_source_draw):
-        if text_prompt == '':
-            return [], gr.Gallery.update(label='Detection prompt is not found!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     if input_image is None:
             return [], gr.Gallery.update(label='Please upload a image!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
@@ -649,30 +584,6 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
         pass
     else:
         groundingdino_device = 'cpu'
-        if device != 'cpu':
-            try:
-                from groundingdino import _C
-                groundingdino_device = 'cuda:0'
-            except:
-                warnings.warn("Failed to load custom C++ ops. Running on CPU mode Only in groundingdino!")
-        boxes_filt, pred_phrases = get_grounding_output(
-            groundingdino_model, image, text_prompt, box_threshold, text_threshold, device=groundingdino_device
-        )
-        if boxes_filt.size(0) == 0:
-            logger.info(f'run_anything_task_[{file_temp}]_{task_type}_[{text_prompt}]_1___{groundingdino_device}/[No objects detected, please try others.]_')
-            return [], gr.Gallery.update(label='No objects detected, please try others.😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
-        boxes_filt_ori = copy.deepcopy(boxes_filt)
-        pred_dict = {
-            "boxes": boxes_filt,
-            "size": [size[1], size[0]],  # H,W
-            "labels": pred_phrases,
-        }
-        image_with_box = plot_boxes_to_image(copy.deepcopy(image_pil), pred_dict)[0]
-        output_images.append(image_with_box)
-        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_2_')
     if task_type == 'segment' or ((task_type in ['inpainting', 'outpainting'] or task_type == 'remove') and mask_source_radio == mask_source_segment):
@@ -680,37 +591,24 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
         if sam_predictor:
             sam_predictor.set_image(image)
-        for i in range(boxes_filt.size(0)):
-            boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
-            boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
-            boxes_filt[i][2:] += boxes_filt[i][:2]
         if sam_predictor:
-            boxes_filt = boxes_filt.to(sam_device)
-            transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2])
-            masks, _, _, _ = sam_predictor.predict_torch(
-                point_coords = None,
-                point_labels = None,
                 # boxes = transformed_boxes,
                 multimask_output = False,
             )
             # masks: [9, 1, 512, 512]
             assert sam_checkpoint, 'sam_checkpoint is not found!'
         else:
-            masks = torch.zeros(len(boxes_filt), 1, H, W)
-            mask_count = 0
-            for box in boxes_filt:
-                masks[mask_count, 0, int(box[1]):int(box[3]), int(box[0]):int(box[2])] = 1
-                mask_count += 1
-            masks = torch.where(masks > 0, True, False)
             run_mode = "rectangle"
         # draw output image
         plt.figure(figsize=(10, 10))
-        plt.imshow(image)
         for mask in masks:
-            show_mask(mask.cpu().numpy(), plt.gca(), random_color=True)
         # for box, label in zip(boxes_filt, pred_phrases):
         #     show_box(box.cpu().numpy(), plt.gca(), label)
         plt.axis('off')
@@ -760,35 +658,7 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
             image_inpainting = sd_model(prompt=inpaint_prompt, image=image_source_for_inpaint, mask_image=image_mask_for_inpaint).images[0]
         else:
             # remove from mask
-            if mask_source_radio == mask_source_segment:
-                mask_imgs = []
-                masks_shape = masks_ori.shape
-                boxes_filt_ori_array = boxes_filt_ori.numpy()
-                if inpaint_mode == 'merge':
-                    extend_shape_0 = masks_shape[0]
-                    extend_shape_1 = masks_shape[1]
-                else:
-                    extend_shape_0 = 1
-                    extend_shape_1 = 1
-                for i in range(extend_shape_0):
-                    for j in range(extend_shape_1):
-                        mask = masks_ori[i][j].cpu().numpy()
-                        mask_pil = Image.fromarray(mask)
-                        if remove_mode == 'segment':
-                            useRectangle = False
-                        else:
-                            useRectangle = True
-                        try:
-                            remove_mask_extend = int(remove_mask_extend)
-                        except:
-                            remove_mask_extend = 10
-                        mask_pil_exp = mask_extend(copy.deepcopy(mask_pil).convert("RGB"),
-                                        xywh_to_xyxy(torch.tensor(boxes_filt_ori_array[i]), W, H),
-                                        extend_pixels=remove_mask_extend, useRectangle=useRectangle)
-                        mask_imgs.append(mask_pil_exp)
-                mask_pil = mix_masks(mask_imgs)
-                output_images.append(mask_pil.convert("RGB"))
-                run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
             logger.info(f'run_anything_task_[{file_temp}]_{task_type}_6_')
             image_inpainting = lama_cleaner_process(np.array(image_pil), np.array(mask_pil.convert("L")), cleaner_size_limit)
@@ -810,7 +680,7 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
     logger.info(f'run_anything_task_[{file_temp}]_9_9_')
     return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
-def change_radio_display(task_type, mask_source_radio):
     text_prompt_visible = True
     inpaint_prompt_visible = False
     mask_source_radio_visible = False
@@ -830,7 +700,7 @@ def change_radio_display(task_type, mask_source_radio):
             kosmos_text_output_visible = True
     if task_type in ['inpainting', 'outpainting']:
-        inpaint_prompt_visible = True
     if task_type in ['inpainting', 'outpainting'] or task_type == "remove":
         mask_source_radio_visible = True
         if mask_source_radio == mask_source_draw:
@@ -838,7 +708,11 @@ def change_radio_display(task_type, mask_source_radio):
     if task_type == "relate anything":
         text_prompt_visible = False
         num_relation_visible = True
     return  (gr.Textbox.update(visible=text_prompt_visible),
             gr.Textbox.update(visible=inpaint_prompt_visible),
             gr.Radio.update(visible=mask_source_radio_visible),
@@ -846,7 +720,8 @@ def change_radio_display(task_type, mask_source_radio):
             gr.Gallery.update(visible=image_gallery_visible),
             gr.Radio.update(visible=kosmos_input_visible),
             gr.Image.update(visible=kosmos_output_visible),
-            gr.HighlightedText.update(visible=kosmos_text_output_visible))
 def get_model_device(module):
     try:
@@ -869,29 +744,39 @@ def click_callback(coords):
 def main_gradio(args):
     block = gr.Blocks(
-            title="SAM and others",
-            theme="shivi/calm_seafoam@>=0.0.1,<1.0.0",
         )
     with block:
         with gr.Row():
             with gr.Column():
                 task_types = ["segment"]
-                # if sam_enable:
-                #     task_types.append("segment")
                 if inpainting_enable:
                     task_types.append("inpainting")
-                #     task_types.append("outpainting")
-                # if lama_cleaner_enable:
-                #     task_types.append("remove")
-                # if ram_enable:
-                #     task_types.append("relate anything")
-                # if kosmos_enable:
-                #     task_types.append("Kosmos-2")
-                # task_types.append("inpainting")
-                input_image = gr.Image(source='upload', elem_id="image_upload", tool='sketch', type='pil', label="Upload",
-                                    height=512, brush_color='#00FFFF', mask_opacity=0.6)
                 print(dir(input_image))
                 task_type = gr.Radio(task_types,  value="segment",
                                                 label='Task type', visible=True)
@@ -956,15 +841,15 @@ def main_gradio(args):
                 selected.change(update_output_image, [kosmos_output, kosmos_output, entity_output, selected], [kosmos_output])
             run_button.click(fn=run_anything_task, inputs=[
-                            input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
                             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input],
                             outputs=[image_gallery, image_gallery, time_cost, time_cost, kosmos_output, kosmos_text_output, entity_output], show_progress=True, queue=True)
-            mask_source_radio.change(fn=change_radio_display, inputs=[task_type, mask_source_radio],
                             outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation])
-            task_type.change(fn=change_radio_display, inputs=[task_type, mask_source_radio],
                             outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation,
-                            image_gallery, kosmos_input, kosmos_output, kosmos_text_output
                             ])
         # DESCRIPTION = f'### This demo from [Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything). <br>'
@@ -1001,17 +886,17 @@ if __name__ == "__main__":
     if device == 'cpu':
         kosmos_enable = False
-    if kosmos_enable:
-        kosmos_model, kosmos_processor = load_kosmos_model(device)
-    if groundingdino_enable:
-        load_groundingdino_model('cpu')
     if sam_enable:
         load_sam_model(device)
-    if inpainting_enable:
-        load_sd_model(device)
     # if lama_cleaner_enable:
     #     load_lama_cleaner_model(device)

 kosmos_model = None
 kosmos_processor = None
+colors = [(255, 0, 0), (0, 255, 0)]
+markers = [1, 5]
+def get_point(img, sel_pix, evt: gr.SelectData):
+        img = np.array(img, dtype=np.uint8)
+        sel_pix.append(evt.index)
+        # draw points
+        print(sel_pix)
+        for point in sel_pix:
+            cv2.drawMarker(img, point, colors[0], markerType=markers[0], markerSize=6, thickness=2)
+        return Image.fromarray(img).convert("RGB")
+def undo_button(orig_img, sel_pix):
+    temp = orig_img.copy()
+    temp = np.array(temp, dtype=np.uint8)
+    if len(sel_pix) != 0:
+        sel_pix.pop()
+        for point in sel_pix:
+            cv2.drawMarker(temp, point, colors[0], markerType=markers[0], markerSize=6, thickness=2)
+    return Image.fromarray(temp).convert("RGB")
+def toggle_button(orig_img, task_type):
+    print(task_type)
+    if task_type == "segment":
+        ret = gr.Image(value= orig_img,elem_id="image_upload", type='pil', label="Upload", height=512, tool = "editor")# tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
+    elif task_type == "inpainting":
+        ret = gr.Image(value = orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
+    task_type = not task_type
+    return ret, task_type
+def store_img(img):
+    print("call for store")
+    return img, []  # when new image is uploaded, `selected_points` should be empty
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
     args = SLConfig.fromfile(model_config_path)
     model = build_model(args)
         device = 'cpu'
     print(f'device={device}')
 def get_sam_vit_h_4b8939():
     if not os.path.exists('./sam_vit_h_4b8939.pth'):
         logger.info(f"get sam_vit_h_4b8939.pth...")
         )
         sd_model = sd_model.to(device)
 def lama_cleaner_process(image, mask, cleaner_size_limit=1080):
     try:
         logger.info(f'_______lama_cleaner_process_______1____')
         image = None
     return  image
 # visualization
 def draw_selected_mask(mask, draw):
     color = (255, 0, 0, 153)
     return new_image
 mask_source_draw = "draw a mask on input image"
 mask_source_segment = "type what to detect below"
     run_task_time = now_time
     return run_task_time, time_cost_str
+def run_anything_task(input_image, input_points, origin_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input, cleaner_size_limit=1080):
     text_prompt = getTextTrans(text_prompt, source='zh', target='en')
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         return None, None, time_cost_str, kosmos_image, gr.Textbox.update(visible=(time_cost_str !='')), kosmos_text, kosmos_entities
     text_prompt = text_prompt.strip()
+    # if not ((task_type in ['inpainting', 'outpainting'] or task_type == 'remove') and mask_source_radio == mask_source_draw):
+    #     if text_prompt == '':
+    #         return [], gr.Gallery.update(label='Detection prompt is not found!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     if input_image is None:
             return [], gr.Gallery.update(label='Please upload a image!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
         pass
     else:
         groundingdino_device = 'cpu'
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_2_')
     if task_type == 'segment' or ((task_type in ['inpainting', 'outpainting'] or task_type == 'remove') and mask_source_radio == mask_source_segment):
         if sam_predictor:
             sam_predictor.set_image(image)
         if sam_predictor:
+            logger.info(f"Forward with: {input_points}")
+            masks, _, _, _ = sam_predictor.predict(
+                point_coords = np.array(input_points),
+                point_labels = np.array([1 for _ in range(len(input_points))]),
                 # boxes = transformed_boxes,
                 multimask_output = False,
             )
             # masks: [9, 1, 512, 512]
             assert sam_checkpoint, 'sam_checkpoint is not found!'
         else:
             run_mode = "rectangle"
         # draw output image
         plt.figure(figsize=(10, 10))
+        plt.imshow(origin_image)
         for mask in masks:
+            show_mask(mask, plt.gca(), random_color=True)
         # for box, label in zip(boxes_filt, pred_phrases):
         #     show_box(box.cpu().numpy(), plt.gca(), label)
         plt.axis('off')
             image_inpainting = sd_model(prompt=inpaint_prompt, image=image_source_for_inpaint, mask_image=image_mask_for_inpaint).images[0]
         else:
             # remove from mask
+            aasds = 1
             logger.info(f'run_anything_task_[{file_temp}]_{task_type}_6_')
             image_inpainting = lama_cleaner_process(np.array(image_pil), np.array(mask_pil.convert("L")), cleaner_size_limit)
     logger.info(f'run_anything_task_[{file_temp}]_9_9_')
     return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
+def change_radio_display(task_type, mask_source_radio, orig_img):
     text_prompt_visible = True
     inpaint_prompt_visible = False
     mask_source_radio_visible = False
             kosmos_text_output_visible = True
     if task_type in ['inpainting', 'outpainting']:
+        inpaint_prompt_visible = False
     if task_type in ['inpainting', 'outpainting'] or task_type == "remove":
         mask_source_radio_visible = True
         if mask_source_radio == mask_source_draw:
     if task_type == "relate anything":
         text_prompt_visible = False
         num_relation_visible = True
+    if task_type == "segment":
+        ret = gr.Image(value= orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "editor")# tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
+    elif task_type == "inpainting":
+        ret = gr.Image(value = orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
     return  (gr.Textbox.update(visible=text_prompt_visible),
             gr.Textbox.update(visible=inpaint_prompt_visible),
             gr.Radio.update(visible=mask_source_radio_visible),
             gr.Gallery.update(visible=image_gallery_visible),
             gr.Radio.update(visible=kosmos_input_visible),
             gr.Image.update(visible=kosmos_output_visible),
+            gr.HighlightedText.update(visible=kosmos_text_output_visible),
+            ret, [], gr.Button("Undo point", visible = task_type == "segment"))
 def get_model_device(module):
     try:
 def main_gradio(args):
     block = gr.Blocks(
+            title="Thesis-Demo",
+            # theme="shivi/calm_seafoam@>=0.0.1,<1.0.0",
         )
     with block:
         with gr.Row():
             with gr.Column():
+                selected_points = gr.State([])
+                original_image = gr.State()
                 task_types = ["segment"]
                 if inpainting_enable:
                     task_types.append("inpainting")
+                input_image = gr.Image(elem_id="image_upload", type='pil', label="Upload", height=512)
+                input_image.upload(
+                    store_img,
+                    [input_image],
+                    [original_image, selected_points]
+                )
+                input_image.select(
+                    get_point,
+                    [input_image, selected_points],
+                    [input_image]
+                )
+                undo_point_button = gr.Button("Undo point")
+                undo_point_button.click(
+                    fn= undo_button,
+                    inputs=[original_image, selected_points],
+                    outputs=[input_image]
+                )
                 print(dir(input_image))
                 task_type = gr.Radio(task_types,  value="segment",
                                                 label='Task type', visible=True)
                 selected.change(update_output_image, [kosmos_output, kosmos_output, entity_output, selected], [kosmos_output])
             run_button.click(fn=run_anything_task, inputs=[
+                            input_image, selected_points, original_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
                             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input],
                             outputs=[image_gallery, image_gallery, time_cost, time_cost, kosmos_output, kosmos_text_output, entity_output], show_progress=True, queue=True)
+            mask_source_radio.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
                             outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation])
+            task_type.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
                             outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation,
+                            image_gallery, kosmos_input, kosmos_output, kosmos_text_output, input_image, selected_points, undo_point_button
                             ])
         # DESCRIPTION = f'### This demo from [Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything). <br>'
     if device == 'cpu':
         kosmos_enable = False
+    # if kosmos_enable:
+    #     kosmos_model, kosmos_processor = load_kosmos_model(device)
+    # if groundingdino_enable:
+    #     load_groundingdino_model('cpu')
     if sam_enable:
         load_sam_model(device)
+    # if inpainting_enable:
+    #     load_sd_model(device)
     # if lama_cleaner_enable:
     #     load_lama_cleaner_model(device)