Spaces:

yizhangliu
/

Grounded-Segment-Anything

Starting on T4

App Files Files Community

liuyizhang commited on Nov 17, 2023

Commit

82b6069

1 Parent(s): c2a6c29

support gradio & api

Browse files

Files changed (3) hide show

api_client.py +69 -0
app.py +197 -65
requirements.txt +1 -5

api_client.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import requests, json
+from PIL import Image
+import matplotlib.pyplot as plt
+import numpy as np
+import base64
+import io
+def request_post(url, data, timeout=600, headers = None):
+    if headers is None:
+        headers = {
+            # 'content-type': 'application/json'
+            # 'Connection': 'keep-alive',
+            'Accept': '*/*', # 接受任何类型的返回数据
+            'Content-Type': 'application/json;charset=UTF-8', # 发送数据为json
+            # 'Content-Length': '156',
+            # 'Accept-Encoding': 'gzip, deflate',
+            # 'Accept-Language': 'zh-CN,zh;q=0.9',
+            # 'User-Agent': 'SamClub/5.0.45 (iPhone; iOS 15.4; Scale/3.00)',
+            # 'device-name': 'iPhone14,3',
+            # 'device-os-version': '15.4',
+            # 'device-type': 'ios',
+            # 'auth-token': authtoken,
+            # 'app-version': '5.0.45.1'
+        }
+    try:
+        response = requests.post(url=url, headers=headers, data=json.dumps(data), timeout=timeout)
+        response_data = response.json()
+        return response_data
+    except Exception as e:
+        print(f'request_post[Error]:' + str(e))
+        print(f'url: {url}')
+        print(f'data: {data}')
+        print(f'response: {response}')
+        return None
+url = "http://127.0.0.1:7860/imgCLeaner"
+def imgFile_to_base64(image_file):
+    with open(image_file, "rb") as f:
+        im_bytes = f.read()
+    im_b64_encode = base64.b64encode(im_bytes)
+    im_b64 = im_b64_encode.decode("utf8")
+    return im_b64
+def base64_to_bytes(im_b64):
+    im_b64_encode = im_b64.encode("utf-8")
+    im_bytes = base64.b64decode(im_b64_encode)
+    return im_bytes
+def base64_to_PILImage(im_b64):
+    im_bytes = base64_to_bytes(im_b64)
+    pil_img = Image.open(io.BytesIO(im_bytes))
+    return pil_img
+image_file = 'dog.png'
+data = {'remove_texts': "小狗 . 椅子",
+    'extend': 20,
+    'img': imgFile_to_base64(image_file),
+    }
+ret = request_post(url, data, timeout=600, headers = None)
+print(len(ret['result']['imgs']))
+for img in ret['result']['imgs']:
+    pilImage = base64_to_PILImage(img)
+    plt.imshow(pilImage)
+    plt.show()
+    plt.clf()

app.py CHANGED Viewed

@@ -120,7 +120,6 @@ ram_model = None
 kosmos_model = None
 kosmos_processor = None
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
     args = SLConfig.fromfile(model_config_path)
     model = build_model(args)
@@ -621,7 +620,8 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     size = image_pil.size
     # run grounding dino model
     if (task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_draw:
         pass
@@ -655,25 +655,35 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_2_')
     if task_type == 'segment' or ((task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_segment):
         image = np.array(input_img)
-        sam_predictor.set_image(image)
-        H, W = size[1], size[0]
         for i in range(boxes_filt.size(0)):
             boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
             boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
             boxes_filt[i][2:] += boxes_filt[i][:2]
-        boxes_filt = boxes_filt.to(sam_device)
-        transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2])
-        masks, _, _, _ = sam_predictor.predict_torch(
-            point_coords = None,
-            point_labels = None,
-            boxes = transformed_boxes,
-            multimask_output = False,
-        )
-        # masks: [9, 1, 512, 512]
-        assert sam_checkpoint, 'sam_checkpoint is not found!'
         # draw output image
         plt.figure(figsize=(10, 10))
         plt.imshow(image)
@@ -686,7 +696,7 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
         plt.savefig(image_path, bbox_inches="tight")
         segment_image_result = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
         os.remove(image_path)
-        output_images.append(segment_image_result)
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_3_')
@@ -705,9 +715,9 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
             masks_ori = copy.deepcopy(masks)
             if inpaint_mode == 'merge':
                 masks = torch.sum(masks, dim=0).unsqueeze(0)
-                masks = torch.where(masks > 0, True, False)
             mask = masks[0][0].cpu().numpy()
-            mask_pil = Image.fromarray(mask)
         output_images.append(mask_pil.convert("RGB"))
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
@@ -718,7 +728,6 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
             image_inpainting = sd_model(prompt=inpaint_prompt, image=image_source_for_inpaint, mask_image=image_mask_for_inpaint).images[0]
         else:
             # remove from mask
-            logger.info(f'run_anything_task_[{file_temp}]_{task_type}_5_')
             if mask_source_radio == mask_source_segment:
                 mask_imgs = []
                 masks_shape = masks_ori.shape
@@ -732,19 +741,17 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
                 for i in range(extend_shape_0):
                     for j in range(extend_shape_1):
                         mask = masks_ori[i][j].cpu().numpy()
-                        mask_pil = Image.fromarray(mask)
                         if remove_mode == 'segment':
                             useRectangle = False
                         else:
                             useRectangle = True
                         try:
                             remove_mask_extend = int(remove_mask_extend)
                         except:
                             remove_mask_extend = 10
                         mask_pil_exp = mask_extend(copy.deepcopy(mask_pil).convert("RGB"),
-                                        xywh_to_xyxy(torch.tensor(boxes_filt_ori_array[i]), size[0], size[1]),
                                         extend_pixels=remove_mask_extend, useRectangle=useRectangle)
                         mask_imgs.append(mask_pil_exp)
                 mask_pil = mix_masks(mask_imgs)
@@ -820,48 +827,7 @@ def get_model_device(module):
     except Exception as e:
         return 'Error'
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser("Grounded SAM demo", add_help=True)
-    parser.add_argument("--debug", action="store_true", help="using debug mode")
-    parser.add_argument("--share", action="store_true", help="share the app")
-    args, _ = parser.parse_known_args()
-    print(f'args = {args}')
-    if os.environ.get('IS_MY_DEBUG') is None:
-        os.system("pip list")
-    device = set_device()
-    if device == 'cpu':
-        kosmos_enable = False
-    if kosmos_enable:
-        kosmos_model, kosmos_processor = load_kosmos_model(device)
-    if groundingdino_enable:
-        groundingdino_model = load_groundingdino_model('cpu')
-    if sam_enable:
-        load_sam_model(device)
-    if inpainting_enable:
-        load_sd_model(device)
-    if lama_cleaner_enable:
-        load_lama_cleaner_model(device)
-    if ram_enable:
-        load_ram_model(device)
-    if os.environ.get('IS_MY_DEBUG') is None:
-        os.system("pip list")
-    # print(f'groundingdino_model__{get_model_device(groundingdino_model)}')
-    # print(f'sam_model__{get_model_device(sam_model)}')
-    # print(f'sd_model__{get_model_device(sd_model)}')
-    # print(f'lama_cleaner_model__{get_model_device(lama_cleaner_model)}')
-    # print(f'ram_model__{get_model_device(ram_model)}')
-    # print(f'kosmos_model__{get_model_device(kosmos_model)}')
     block = gr.Blocks().queue()
     with block:
         with gr.Row():
@@ -968,5 +934,171 @@ if __name__ == "__main__":
     print(f'device = {device}')
     print(f'torch.cuda.is_available = {torch.cuda.is_available()}')
     computer_info()
-    block.launch(server_name='0.0.0.0', debug=args.debug, share=args.share)

 kosmos_model = None
 kosmos_processor = None
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
     args = SLConfig.fromfile(model_config_path)
     model = build_model(args)
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     size = image_pil.size
+    H, W = size[1], size[0]
     # run grounding dino model
     if (task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_draw:
         pass
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_2_')
     if task_type == 'segment' or ((task_type == 'inpainting' or task_type == 'remove') and mask_source_radio == mask_source_segment):
         image = np.array(input_img)
+        if sam_predictor:
+            sam_predictor.set_image(image)
         for i in range(boxes_filt.size(0)):
             boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
             boxes_filt[i][:2] -= boxes_filt[i][2:] / 2
             boxes_filt[i][2:] += boxes_filt[i][:2]
+        if sam_predictor:
+            boxes_filt = boxes_filt.to(sam_device)
+            transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2])
+            masks, _, _, _ = sam_predictor.predict_torch(
+                point_coords = None,
+                point_labels = None,
+                boxes = transformed_boxes,
+                multimask_output = False,
+            )
+            # masks: [9, 1, 512, 512]
+            assert sam_checkpoint, 'sam_checkpoint is not found!'
+        else:
+            masks = torch.zeros(len(boxes_filt), 1, H, W)
+            mask_count = 0
+            for box in boxes_filt:
+                masks[mask_count, 0, int(box[1]):int(box[3]), int(box[0]):int(box[2])] = 1
+                mask_count += 1
+            masks = torch.where(masks > 0, True, False)
+            run_mode = "rectangle"
         # draw output image
         plt.figure(figsize=(10, 10))
         plt.imshow(image)
         plt.savefig(image_path, bbox_inches="tight")
         segment_image_result = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
         os.remove(image_path)
+        output_images.append(Image.fromarray(segment_image_result))
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_3_')
             masks_ori = copy.deepcopy(masks)
             if inpaint_mode == 'merge':
                 masks = torch.sum(masks, dim=0).unsqueeze(0)
+            masks = torch.where(masks > 0, True, False)
             mask = masks[0][0].cpu().numpy()
+            mask_pil = Image.fromarray(mask)
         output_images.append(mask_pil.convert("RGB"))
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
             image_inpainting = sd_model(prompt=inpaint_prompt, image=image_source_for_inpaint, mask_image=image_mask_for_inpaint).images[0]
         else:
             # remove from mask
             if mask_source_radio == mask_source_segment:
                 mask_imgs = []
                 masks_shape = masks_ori.shape
                 for i in range(extend_shape_0):
                     for j in range(extend_shape_1):
                         mask = masks_ori[i][j].cpu().numpy()
+                        mask_pil = Image.fromarray(mask)
                         if remove_mode == 'segment':
                             useRectangle = False
                         else:
                             useRectangle = True
                         try:
                             remove_mask_extend = int(remove_mask_extend)
                         except:
                             remove_mask_extend = 10
                         mask_pil_exp = mask_extend(copy.deepcopy(mask_pil).convert("RGB"),
+                                        xywh_to_xyxy(torch.tensor(boxes_filt_ori_array[i]), W, H),
                                         extend_pixels=remove_mask_extend, useRectangle=useRectangle)
                         mask_imgs.append(mask_pil_exp)
                 mask_pil = mix_masks(mask_imgs)
     except Exception as e:
         return 'Error'
+def main_gradio(args):
     block = gr.Blocks().queue()
     with block:
         with gr.Row():
     print(f'device = {device}')
     print(f'torch.cuda.is_available = {torch.cuda.is_available()}')
     computer_info()
+    block.launch(server_name='0.0.0.0', server_port=args.port, debug=args.debug, share=args.share)
+import signal
+import json
+from datetime import date, datetime, timedelta
+from gevent import pywsgi
+import base64
+def imgFile_to_base64(image_file):
+    with open(image_file, "rb") as f:
+        im_bytes = f.read()
+    im_b64_encode = base64.b64encode(im_bytes)
+    im_b64 = im_b64_encode.decode("utf8")
+    return im_b64
+def base64_to_bytes(im_b64):
+    im_b64_encode = im_b64.encode("utf-8")
+    im_bytes = base64.b64decode(im_b64_encode)
+    return im_bytes
+def base64_to_PILImage(im_b64):
+    im_bytes = base64_to_bytes(im_b64)
+    pil_img = Image.open(io.BytesIO(im_bytes))
+    return pil_img
+class API_Starter:
+    def __init__(self):
+        from flask import Flask, request, jsonify, make_response
+        from flask_cors import CORS, cross_origin
+        import logging
+        app = Flask(__name__)
+        app.logger.setLevel(logging.ERROR)
+        CORS(app, supports_credentials=True, resources={r"/*": {"origins": "*"}})
+        @app.route('/imgCLeaner', methods=['GET', 'POST'])
+        @cross_origin()
+        def processAssist():
+            if request.method == 'GET':
+                ret_json = {'code': -1, 'reason':'no support to get'}
+            elif request.method == 'POST':
+                request_data = request.data.decode('utf-8')
+                data = json.loads(request_data)
+                result = self.handle_data(data)
+                ret_json = {'code': 0, 'result':result}
+            return jsonify(ret_json)
+        self.app = app
+        now_time = datetime.now().strftime('%Y%m%d_%H%M%S')
+        logger.add(f'./logs/logger_[{args.port}]_{now_time}.log')
+        signal.signal(signal.SIGINT, self.signal_handler)
+    def handle_data(self, data):
+        im_b64 = data['img']
+        img = base64_to_PILImage(im_b64)
+        results = run_anything_task(input_image = img,
+                            text_prompt = data['remove_texts'],
+                            task_type = 'remove',
+                            inpaint_prompt = '',
+                            box_threshold = 0.3,
+                            text_threshold = 0.25,
+                            iou_threshold = 0.8,
+                            inpaint_mode = "merge",
+                            mask_source_radio = "type what to detect below",
+                            remove_mode = "rectangle",   # ["segment", "rectangle"]
+                            remove_mask_extend = "10",
+                            num_relation = 5,
+                            kosmos_input = None,
+                            cleaner_size_limit = -1,
+                            )
+        output_images = results[0]
+        ret_json_images = []
+        file_temp = int(time.time())
+        count = 0
+        for image_pil in output_images:
+            try:
+                img_format = image_pil.format.lower()
+            except Exception as e:
+                img_format = 'png'
+            image_path = os.path.join(output_dir, f"api_images_{file_temp}_{count}.{img_format}")
+            count += 1
+            try:
+                image_pil.save(image_path)
+            except Exception as e:
+                Image.fromarray(image_pil).save(image_path)
+            im_b64 = imgFile_to_base64(image_path)
+            ret_json_images.append(im_b64)
+            os.remove(image_path)
+        data = {
+            'imgs': ret_json_images,
+            }
+        return data
+    def signal_handler(self, signal, frame):
+        print('\nSignal Catched! You have just type Ctrl+C!')
+        sys.exit(0)
+    def run(self):
+        from gevent import pywsgi
+        logger.info(f'\nargs={args}\n')
+        computer_info()
+        server = pywsgi.WSGIServer(('0.0.0.0', args.port), self.app)
+        server.serve_forever()
+def main_api(args):
+    if args.port == 0:
+        print('Please give valid port!')
+    else:
+        api_starter = API_Starter()
+        api_starter.run()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("Grounded SAM demo", add_help=True)
+    parser.add_argument("--debug", action="store_true", help="using debug mode")
+    parser.add_argument("--share", action="store_true", help="share the app")
+    parser.add_argument("--port", "-p", type=int, default=7860, help="port")
+    args, _ = parser.parse_known_args()
+    print(f'args = {args}')
+    if os.environ.get('IS_MY_DEBUG') is None:
+        os.system("pip list")
+    device = set_device()
+    if device == 'cpu':
+        kosmos_enable = False
+    if kosmos_enable:
+        kosmos_model, kosmos_processor = load_kosmos_model(device)
+    if groundingdino_enable:
+        groundingdino_model = load_groundingdino_model('cpu')
+    if sam_enable:
+        load_sam_model(device)
+    if inpainting_enable:
+        load_sd_model(device)
+    if lama_cleaner_enable:
+        load_lama_cleaner_model(device)
+    if ram_enable:
+        load_ram_model(device)
+    if os.environ.get('IS_MY_DEBUG') is None:
+        os.system("pip list")
+    # print(f'groundingdino_model__{get_model_device(groundingdino_model)}')
+    # print(f'sam_model__{get_model_device(sam_model)}')
+    # print(f'sd_model__{get_model_device(sd_model)}')
+    # print(f'lama_cleaner_model__{get_model_device(lama_cleaner_model)}')
+    # print(f'ram_model__{get_model_device(ram_model)}')
+    # print(f'kosmos_model__{get_model_device(kosmos_model)}')
+    if os.environ.get('IS_MY_DEBUG') is None:
+        # Provide gradio services
+        main_gradio(args)
+    else:
+        if 0 == 0:
+            # Provide API services
+            main_api(args)
+        else:
+            # Provide gradio services
+            main_gradio(args)

requirements.txt CHANGED Viewed

@@ -15,14 +15,10 @@ setuptools
 supervision
 termcolor
 timm
-# torch
-# torchvision
 torch==2.0.0
 torchvision==0.15.1
-# torch==2.1.0
-# torchvision==0.16.0
 yapf
 numba
 scipy

 supervision
 termcolor
 timm
 torch==2.0.0
 torchvision==0.15.1
+gevent
 yapf
 numba
 scipy