mart9992
/

theimageconvert2

ONNX

Inference Endpoints

Model card Files Files and versions Community

mart9992 commited on Jan 7, 2024

Commit

79fc7ef

•

1 Parent(s): 5553910

Update handler.py

Browse files

Files changed (1) hide show

handler.py +13 -441

handler.py CHANGED Viewed

@@ -2,7 +2,7 @@ import string
 import warnings
 warnings.filterwarnings('ignore')
 import subprocess, io, os, sys, time
-from dw_pose.main import dwpose
 # os.environ["XFORMERS_DISABLE_FLASH_ATTN"] = "1"
 # result = subprocess.run(['pip', 'install', 'xformers'], check=True)
@@ -77,19 +77,6 @@ kosmos_enable = False
 # qwen_enable = True
 # from qwen_utils import *
-if os.environ.get('IS_MY_DEBUG') is not None:
-    sam_enable = False
-    ram_enable = False
-    inpainting_enable = False
-    kosmos_enable = False
-if lama_cleaner_enable:
-    try:
-        from lama_cleaner.model_manager import ModelManager
-        from lama_cleaner.schema import Config as lama_Config
-    except Exception as e:
-        lama_cleaner_enable = False
 # segment anything
 from segment_anything import build_sam, SamPredictor, SamAutomaticMaskGenerator
@@ -98,34 +85,8 @@ import PIL
 import requests
 import torch
 from io import BytesIO
-from diffusers import StableDiffusionInpaintPipeline
 from huggingface_hub import hf_hub_download
-from util_computer import computer_info
-# relate anything
-from ram_utils import iou, sort_and_deduplicate, relation_classes, MLP, show_anns, ram_show_mask
-from ram_train_eval import RamModel, RamPredictor
-from mmengine.config import Config as mmengine_Config
-if lama_cleaner_enable:
-    from lama_cleaner.helper import (
-        load_img,
-        numpy_to_bytes,
-        resize_max_size,
-    )
-# from transformers import AutoProcessor, AutoModelForVision2Seq
-import ast
-if kosmos_enable and install_stuff:
-    os.system("pip install transformers@git+https://github.com/huggingface/transformers.git@main")
-    # os.system("pip install transformers==4.32.0")
-from kosmos_utils import *
-from util_tencent import getTextTrans
 config_file = 'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py'
 ckpt_repo_id = "ShilongLiu/GroundingDINO"
 ckpt_filenmae = "groundingdino_swint_ogc.pth"
@@ -133,10 +94,7 @@ sam_checkpoint = './sam_hq_vit_h.pth'
 output_dir = "outputs"
 device = 'cpu'
-os.makedirs(output_dir, exist_ok=True)
-groundingdino_model = None
 sam_device = "cuda"
-sam_model = None
 def get_sam_vit_h_4b8939():
@@ -150,20 +108,20 @@ def get_sam_vit_h_4b8939():
             f.write(response.content)
         print('Downloaded sam_vit_h_4b8939.pth')
-get_sam_vit_h_4b8939()
 logger.info(f"initialize SAM model...")
 sam_device = "cuda"
-sam_model = build_sam(checkpoint=sam_checkpoint).to(sam_device)
-sam_predictor = SamPredictor(sam_model)
-sam_mask_generator = SamAutomaticMaskGenerator(sam_model)
-sam_mask_generator = None
 sd_model = None
 lama_cleaner_model= None
 ram_model = None
 kosmos_model = None
 kosmos_processor = None
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
     args = SLConfig.fromfile(model_config_path)
     model = build_model(args)
@@ -324,167 +282,6 @@ def mix_masks(imgs):
     re_img =  1 - re_img
     return  Image.fromarray(np.uint8(255*re_img))
-def set_device():
-    if os.environ.get('IS_MY_DEBUG') is None:
-        device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    else:
-        device = 'cpu'
-    print(f'device={device}')
-    return device
-def load_groundingdino_model(device):
-    # initialize groundingdino model
-    logger.info(f"initialize groundingdino model...")
-    groundingdino_model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae, device=device) #'cpu')
-    return groundingdino_model
-def load_sam_model(device):
-    # initialize SAM
-    global sam_model, sam_predictor, sam_mask_generator, sam_device
-    get_sam_vit_h_4b8939()
-    logger.info(f"initialize SAM model...")
-    sam_device = device
-    sam_model = build_sam(checkpoint=sam_checkpoint).to(sam_device)
-    sam_predictor = SamPredictor(sam_model)
-    sam_mask_generator = SamAutomaticMaskGenerator(sam_model)
-def load_sd_model(device):
-    # initialize stable-diffusion-inpainting
-    global sd_model
-    logger.info(f"initialize stable-diffusion-inpainting...")
-    sd_model = None
-    if os.environ.get('IS_MY_DEBUG') is None:
-        sd_model = StableDiffusionInpaintPipeline.from_pretrained(
-                "runwayml/stable-diffusion-inpainting",
-                revision="fp16",
-                # "stabilityai/stable-diffusion-2-inpainting",
-                torch_dtype=torch.float16,
-        )
-        sd_model = sd_model.to(device)
-def load_lama_cleaner_model(device):
-    # initialize lama_cleaner
-    global lama_cleaner_model
-    logger.info(f"initialize lama_cleaner...")
-    lama_cleaner_model = ModelManager(
-            name='lama',
-            device=device,
-        )
-def lama_cleaner_process(image, mask, cleaner_size_limit=1080):
-    try:
-        logger.info(f'_______lama_cleaner_process_______1____')
-        ori_image = image
-        if mask.shape[0] == image.shape[1] and mask.shape[1] == image.shape[0] and mask.shape[0] != mask.shape[1]:
-            # rotate image
-            logger.info(f'_______lama_cleaner_process_______2____')
-            ori_image = np.transpose(image[::-1, ...][:, ::-1], axes=(1, 0, 2))[::-1, ...]
-            logger.info(f'_______lama_cleaner_process_______3____')
-            image = ori_image
-        logger.info(f'_______lama_cleaner_process_______4____')
-        original_shape = ori_image.shape
-        logger.info(f'_______lama_cleaner_process_______5____')
-        interpolation = cv2.INTER_CUBIC
-        size_limit = cleaner_size_limit
-        if size_limit == -1:
-            logger.info(f'_______lama_cleaner_process_______6____')
-            size_limit = max(image.shape)
-        else:
-            logger.info(f'_______lama_cleaner_process_______7____')
-            size_limit = int(size_limit)
-        logger.info(f'_______lama_cleaner_process_______8____')
-        config = lama_Config(
-            ldm_steps=25,
-            ldm_sampler='plms',
-            zits_wireframe=True,
-            hd_strategy='Original',
-            hd_strategy_crop_margin=196,
-            hd_strategy_crop_trigger_size=1280,
-            hd_strategy_resize_limit=2048,
-            prompt='',
-            use_croper=False,
-            croper_x=0,
-            croper_y=0,
-            croper_height=512,
-            croper_width=512,
-            sd_mask_blur=5,
-            sd_strength=0.75,
-            sd_steps=50,
-            sd_guidance_scale=7.5,
-            sd_sampler='ddim',
-            sd_seed=42,
-            cv2_flag='INPAINT_NS',
-            cv2_radius=5,
-        )
-        logger.info(f'_______lama_cleaner_process_______9____')
-        if config.sd_seed == -1:
-            config.sd_seed = random.randint(1, 999999999)
-        # logger.info(f"Origin image shape_0_: {original_shape} / {size_limit}")
-        logger.info(f'_______lama_cleaner_process_______10____')
-        image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
-        # logger.info(f"Resized image shape_1_: {image.shape}")
-        # logger.info(f"mask image shape_0_: {mask.shape} / {type(mask)}")
-        logger.info(f'_______lama_cleaner_process_______11____')
-        mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
-        # logger.info(f"mask image shape_1_: {mask.shape} / {type(mask)}")
-        logger.info(f'_______lama_cleaner_process_______12____')
-        res_np_img = lama_cleaner_model(image, mask, config)
-        logger.info(f'_______lama_cleaner_process_______13____')
-        torch.cuda.empty_cache()
-        logger.info(f'_______lama_cleaner_process_______14____')
-        image = Image.open(io.BytesIO(numpy_to_bytes(res_np_img, 'png')))
-        logger.info(f'_______lama_cleaner_process_______15____')
-    except Exception as e:
-        logger.info(f'lama_cleaner_process[Error]:' + str(e))
-        image = None
-    return  image
-class Ram_Predictor(RamPredictor):
-    def __init__(self, config, device='cpu'):
-        self.config = config
-        self.device = torch.device(device)
-        self._build_model()
-    def _build_model(self):
-        self.model = RamModel(**self.config.model).to(self.device)
-        if self.config.load_from is not None:
-            self.model.load_state_dict(torch.load(self.config.load_from, map_location=self.device))
-        self.model.train()
-def load_ram_model(device):
-    # load ram model
-    global ram_model
-    if os.environ.get('IS_MY_DEBUG') is not None:
-        return
-    model_path = "./checkpoints/ram_epoch12.pth"
-    ram_config = dict(
-        model=dict(
-            pretrained_model_name_or_path='bert-base-uncased',
-            load_pretrained_weights=False,
-            num_transformer_layer=2,
-            input_feature_size=256,
-            output_feature_size=768,
-            cls_feature_size=512,
-            num_relation_classes=56,
-            pred_type='attention',
-            loss_type='multi_label_ce',
-        ),
-        load_from=model_path,
-    )
-    ram_config = mmengine_Config(ram_config)
-    ram_model = Ram_Predictor(ram_config, device)
 # visualization
 def draw_selected_mask(mask, draw):
     color = (255, 0, 0, 153)
@@ -623,10 +420,6 @@ def get_time_cost(run_task_time, time_cost_str):
 def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input, cleaner_size_limit=1080):
-    text_prompt = getTextTrans(text_prompt, source='zh', target='en')
-    inpaint_prompt = getTextTrans(inpaint_prompt, source='zh', target='en')
     run_task_time = 0
     time_cost_str = ''
     run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
@@ -792,115 +585,6 @@ def get_model_device(module):
     except Exception as e:
         return 'Error'
-def main_gradio(args):
-    block = gr.Blocks().queue()
-    with block:
-        with gr.Row():
-            with gr.Column():
-                task_types = ["detection"]
-                if sam_enable:
-                    task_types.append("segment")
-                if inpainting_enable:
-                    task_types.append("inpainting")
-                if lama_cleaner_enable:
-                    task_types.append("remove")
-                if ram_enable:
-                    task_types.append("relate anything")
-                if kosmos_enable:
-                    task_types.append("Kosmos-2")
-                input_image = gr.Image(source='upload', elem_id="image_upload", tool='sketch', type='pil', label="Upload",
-                                    height=512, brush_color='#00FFFF', mask_opacity=0.6)
-                task_type = gr.Radio(task_types,  value="detection",
-                                                label='Task type', visible=True)
-                mask_source_radio = gr.Radio([mask_source_draw, mask_source_segment],
-                                    value=mask_source_segment, label="Mask from",
-                                    visible=False)
-                text_prompt = gr.Textbox(label="Detection Prompt[To detect multiple objects, seperating each with '.', like this: cat . dog . chair ]", placeholder="Cannot be empty")
-                inpaint_prompt = gr.Textbox(label="Inpaint Prompt (if this is empty, then remove)", visible=False)
-                num_relation = gr.Slider(label="How many relations do you want to see", minimum=1, maximum=20, value=5, step=1, visible=False)
-                kosmos_input = gr.Radio(["Brief", "Detailed"], label="Kosmos Description Type", value="Brief", visible=False)
-                run_button = gr.Button(label="Run", visible=True)
-                with gr.Accordion("Advanced options", open=False) as advanced_options:
-                    box_threshold = gr.Slider(
-                        label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.001
-                    )
-                    text_threshold = gr.Slider(
-                        label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
-                    )
-                    iou_threshold = gr.Slider(
-                        label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.001
-                    )
-                    inpaint_mode = gr.Radio(["merge", "first"], value="merge", label="inpaint_mode")
-                    with gr.Row():
-                        with gr.Column(scale=1):
-                            remove_mode = gr.Radio(["segment", "rectangle"],  value="segment", label='remove mode')
-                        with gr.Column(scale=1):
-                            remove_mask_extend = gr.Textbox(label="remove_mask_extend", value='10')
-            with gr.Column():
-                image_gallery = gr.Gallery(label="result images", show_label=True, elem_id="gallery", height=512, visible=True
-                    ).style(preview=True, columns=[5], object_fit="scale-down", height="auto")
-                time_cost = gr.Textbox(label="Time cost by step (ms):", visible=False, interactive=False)
-                kosmos_output = gr.Image(type="pil", label="result images", visible=False)
-                kosmos_text_output = gr.HighlightedText(
-                                    label="Generated Description",
-                                    combine_adjacent=False,
-                                    show_legend=True,
-                                    visible=False,
-                                ).style(color_map=color_map)
-                # record which text span (label) is selected
-                selected = gr.Number(-1, show_label=False, placeholder="Selected", visible=False)
-                # record the current `entities`
-                entity_output = gr.Textbox(visible=False)
-                # get the current selected span label
-                def get_text_span_label(evt: gr.SelectData):
-                    if evt.value[-1] is None:
-                        return -1
-                    return int(evt.value[-1])
-                # and set this information to `selected`
-                kosmos_text_output.select(get_text_span_label, None, selected)
-                # update output image when we change the span (enity) selection
-                def update_output_image(img_input, image_output, entities, idx):
-                    entities = ast.literal_eval(entities)
-                    updated_image = draw_entity_boxes_on_image(img_input, entities, entity_index=idx)
-                    return updated_image
-                selected.change(update_output_image, [kosmos_output, kosmos_output, entity_output, selected], [kosmos_output])
-            run_button.click(fn=run_anything_task, inputs=[
-                            input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
-                            iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input],
-                            outputs=[image_gallery, image_gallery, time_cost, time_cost, kosmos_output, kosmos_text_output, entity_output], show_progress=True, queue=True)
-            mask_source_radio.change(fn=change_radio_display, inputs=[task_type, mask_source_radio],
-                            outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation])
-            task_type.change(fn=change_radio_display, inputs=[task_type, mask_source_radio],
-                            outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation,
-                            image_gallery, kosmos_input, kosmos_output, kosmos_text_output
-                            ])
-        DESCRIPTION = f'### This demo from [Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything). <br>'
-        if lama_cleaner_enable:
-            DESCRIPTION += f'Remove(cleaner) from [lama-cleaner](https://github.com/Sanster/lama-cleaner). <br>'
-        if kosmos_enable:
-            DESCRIPTION += f'Kosmos-2 from [Kosmos-2](https://github.com/microsoft/unilm/tree/master/kosmos-2). <br>'
-        if ram_enable:
-            DESCRIPTION += f'RAM from [RelateAnything](https://github.com/Luodian/RelateAnything). <br>'
-        DESCRIPTION += f'Thanks for their excellent work.'
-        DESCRIPTION += f'<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. \
-                        <a href="https://huggingface.co/spaces/yizhangliu/Grounded-Segment-Anything?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
-        gr.Markdown(DESCRIPTION)
-    print(f'device = {device}')
-    print(f'torch.cuda.is_available = {torch.cuda.is_available()}')
-    computer_info()
-    block.launch(server_name='0.0.0.0', server_port=args.port, debug=args.debug, share=args.share)
 import signal
 import json
@@ -908,122 +592,13 @@ from datetime import date, datetime, timedelta
 from gevent import pywsgi
 import base64
-def imgFile_to_base64(image_file):
-    with open(image_file, "rb") as f:
-        im_bytes = f.read()
-    im_b64_encode = base64.b64encode(im_bytes)
-    im_b64 = im_b64_encode.decode("utf8")
-    return im_b64
-def base64_to_bytes(im_b64):
-    im_b64_encode = im_b64.encode("utf-8")
-    im_bytes = base64.b64decode(im_b64_encode)
-    return im_bytes
-def base64_to_PILImage(im_b64):
-    im_bytes = base64_to_bytes(im_b64)
-    pil_img = Image.open(io.BytesIO(im_bytes))
-    return pil_img
-class API_Starter:
-    def __init__(self):
-        from flask import Flask, request, jsonify, make_response
-        from flask_cors import CORS, cross_origin
-        import logging
-        app = Flask(__name__)
-        app.logger.setLevel(logging.ERROR)
-        CORS(app, supports_credentials=True, resources={r"/*": {"origins": "*"}})
-        @app.route('/imgCLeaner', methods=['GET', 'POST'])
-        @cross_origin()
-        def processAssist():
-            if request.method == 'GET':
-                ret_json = {'code': -1, 'reason':'no support to get'}
-            elif request.method == 'POST':
-                request_data = request.data.decode('utf-8')
-                data = json.loads(request_data)
-                result = self.handle_data(data)
-                if result is None:
-                    ret_json = {'code': -2, 'reason':'handle error'}
-                else:
-                    ret_json = {'code': 0, 'result':result}
-            return jsonify(ret_json)
-        self.app = app
-        now_time = datetime.now().strftime('%Y%m%d_%H%M%S')
-        logger.add(f'./logs/logger_[{args.port}]_{now_time}.log')
-        signal.signal(signal.SIGINT, self.signal_handler)
-    def handle_data(self, data):
-        im_b64 = data['img']
-        img = base64_to_PILImage(im_b64)
-        remove_texts = data['remove_texts']
-        remove_mask_extend = data['mask_extend']
-        results = run_anything_task(input_image = img,
-                            text_prompt = f"{remove_texts}",
-                            task_type = 'remove',
-                            inpaint_prompt = '',
-                            box_threshold = 0.3,
-                            text_threshold = 0.25,
-                            iou_threshold = 0.8,
-                            inpaint_mode = "merge",
-                            mask_source_radio = "type what to detect below",
-                            remove_mode = "rectangle",   # ["segment", "rectangle"]
-                            remove_mask_extend = f"{remove_mask_extend}",
-                            num_relation = 5,
-                            kosmos_input = None,
-                            cleaner_size_limit = -1,
-                            )
-        output_images = results[0]
-        if output_images is None:
-            return None
-        ret_json_images = []
-        file_temp = int(time.time())
-        count = 0
-        output_images = output_images[-1:]
-        for image_pil in output_images:
-            try:
-                img_format = image_pil.format.lower()
-            except Exception as e:
-                img_format = 'png'
-            image_path = os.path.join(output_dir, f"api_images_{file_temp}_{count}.{img_format}")
-            count += 1
-            try:
-                image_pil.save(image_path)
-            except Exception as e:
-                Image.fromarray(image_pil).save(image_path)
-            im_b64 = imgFile_to_base64(image_path)
-            ret_json_images.append(im_b64)
-            os.remove(image_path)
-        data = {
-            'imgs': ret_json_images,
-            }
-        return data
-    def signal_handler(self, signal, frame):
-        print('\nSignal Catched! You have just type Ctrl+C!')
-        sys.exit(0)
-    def run(self):
-        from gevent import pywsgi
-        logger.info(f'\nargs={args}\n')
-        computer_info()
-        print(f"Start a api server: http://0.0.0.0:{args.port}/imgCLeaner")
-        server = pywsgi.WSGIServer(('0.0.0.0', args.port), self.app)
-        server.serve_forever()
-device = set_device()
-groundingdino_model = load_groundingdino_model('cuda:0')
-load_sam_model("cuda:0")
-load_sd_model("cuda:0")
-load_lama_cleaner_model("cuda:0")
-# load_ram_model("cuda:0")
 def expand_white_pixels(input_pil, expand_by=1):
     # Convert the input image to grayscale
@@ -1063,6 +638,7 @@ s3 = s3_session.client(
     endpoint_url=S3_ENDPOINT_URL,
 )
 class EndpointHandler():
     def __init__(self, path=""):
         # get_nude(Image.open("girl.png"))
@@ -1105,7 +681,3 @@ class EndpointHandler():
         return {
             "filenames": filenames
         }
-print(EndpointHandler()({
-    "original_link": "https://www.shutterstock.com/image-photo/attractive-confident-young-woman-posing-600nw-2185228917.jpg"
-}))

 import warnings
 warnings.filterwarnings('ignore')
 import subprocess, io, os, sys, time
+import random
 # os.environ["XFORMERS_DISABLE_FLASH_ATTN"] = "1"
 # result = subprocess.run(['pip', 'install', 'xformers'], check=True)
 # qwen_enable = True
 # from qwen_utils import *
 # segment anything
 from segment_anything import build_sam, SamPredictor, SamAutomaticMaskGenerator
 import requests
 import torch
 from io import BytesIO
 from huggingface_hub import hf_hub_download
 config_file = 'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py'
 ckpt_repo_id = "ShilongLiu/GroundingDINO"
 ckpt_filenmae = "groundingdino_swint_ogc.pth"
 output_dir = "outputs"
 device = 'cpu'
 sam_device = "cuda"
 def get_sam_vit_h_4b8939():
             f.write(response.content)
         print('Downloaded sam_vit_h_4b8939.pth')
 logger.info(f"initialize SAM model...")
 sam_device = "cuda"
 sd_model = None
 lama_cleaner_model= None
 ram_model = None
 kosmos_model = None
 kosmos_processor = None
+get_sam_vit_h_4b8939()
+sam_model = build_sam(checkpoint=sam_checkpoint).to(sam_device)
+sam_predictor = SamPredictor(sam_model)
+sam_mask_generator = SamAutomaticMaskGenerator(sam_model)
 def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
     args = SLConfig.fromfile(model_config_path)
     model = build_model(args)
     re_img =  1 - re_img
     return  Image.fromarray(np.uint8(255*re_img))
 # visualization
 def draw_selected_mask(mask, draw):
     color = (255, 0, 0, 153)
 def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
             iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input, cleaner_size_limit=1080):
     run_task_time = 0
     time_cost_str = ''
     run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     except Exception as e:
         return 'Error'
 import signal
 import json
 from gevent import pywsgi
 import base64
+def get_groundingdino_model(device):
+    # initialize groundingdino model
+    logger.info(f"initialize groundingdino model...")
+    model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae, device=device)
+    return model
+groundingdino_model = get_groundingdino_model("cuda")
 def expand_white_pixels(input_pil, expand_by=1):
     # Convert the input image to grayscale
     endpoint_url=S3_ENDPOINT_URL,
 )
 class EndpointHandler():
     def __init__(self, path=""):
         # get_nude(Image.open("girl.png"))
         return {
             "filenames": filenames
         }