m

Browse files

Files changed (10) hide show

GroundingDINO/groundingdino/__pycache__/__init__.cpython-310.pyc +0 -0
GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-310.pyc +0 -0
__pycache__/grounded_sam_demo.cpython-310.pyc +0 -0
__pycache__/handler.cpython-310.pyc +0 -0
__pycache__/test.cpython-310.pyc +0 -0
grounded_sam_demo.py +51 -159
handler.py +58 -0
handler_test.py +13 -0
test.py +57 -0

GroundingDINO/groundingdino/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (149 Bytes). View file

GroundingDINO/groundingdino/datasets/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (158 Bytes). View file

GroundingDINO/groundingdino/datasets/__pycache__/transforms.cpython-310.pyc ADDED Viewed

Binary file (10.1 kB). View file

__pycache__/grounded_sam_demo.cpython-310.pyc ADDED Viewed

Binary file (3.58 kB). View file

__pycache__/handler.cpython-310.pyc ADDED Viewed

Binary file (1.88 kB). View file

__pycache__/test.cpython-310.pyc ADDED Viewed

Binary file (1.73 kB). View file

grounded_sam_demo.py CHANGED Viewed

@@ -1,4 +1,5 @@
-import argparse
 import os
 import copy
@@ -16,8 +17,8 @@ from GroundingDINO.groundingdino.util.utils import clean_state_dict, get_phrases
 # segment anything
 from segment_anything import (
-    sam_model_registry,
-    sam_hq_model_registry,
     SamPredictor
 )
 import cv2
@@ -25,27 +26,13 @@ import numpy as np
 import matplotlib.pyplot as plt
-def load_image(image_path):
-    # load image
-    image_pil = Image.open(image_path).convert("RGB")  # load image
-    transform = T.Compose(
-        [
-            T.RandomResize([800], max_size=1333),
-            T.ToTensor(),
-            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
-        ]
-    )
-    image, _ = transform(image_pil, None)  # 3, h, w
-    return image_pil, image
 def load_model(model_config_path, model_checkpoint_path, device):
     args = SLConfig.fromfile(model_config_path)
     args.device = device
     model = build_model(args)
     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
-    load_res = model.load_state_dict(clean_state_dict(checkpoint["model"]), strict=False)
     print(load_res)
     _ = model.eval()
     return model
@@ -72,136 +59,38 @@ def get_grounding_output(model, image, caption, box_threshold, text_threshold, w
     boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
     logits_filt.shape[0]
-    # get phrase
-    tokenlizer = model.tokenizer
-    tokenized = tokenlizer(caption)
-    # build pred
-    pred_phrases = []
-    for logit, box in zip(logits_filt, boxes_filt):
-        pred_phrase = get_phrases_from_posmap(logit > text_threshold, tokenized, tokenlizer)
-        if with_logits:
-            pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})")
-        else:
-            pred_phrases.append(pred_phrase)
-    return boxes_filt, pred_phrases
-def show_mask(mask, ax, random_color=False):
-    if random_color:
-        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
-    else:
-        color = np.array([30/255, 144/255, 255/255, 0.6])
-    h, w = mask.shape[-2:]
-    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
-    ax.imshow(mask_image)
-def show_box(box, ax, label):
-    x0, y0 = box[0], box[1]
-    w, h = box[2] - box[0], box[3] - box[1]
-    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))
-    ax.text(x0, y0, label)
-def save_mask_data(output_dir, mask_list, box_list, label_list):
-    value = 0  # 0 for background
-    mask_img = torch.zeros(mask_list.shape[-2:])
-    for idx, mask in enumerate(mask_list):
-        mask_img[mask.cpu().numpy()[0] == True] = value + idx + 1
-    plt.figure(figsize=(10, 10))
-    plt.imshow(mask_img.numpy())
-    plt.axis('off')
-    plt.savefig(os.path.join(output_dir, 'mask.jpg'), bbox_inches="tight", dpi=300, pad_inches=0.0)
-    json_data = [{
-        'value': value,
-        'label': 'background'
-    }]
-    for label, box in zip(label_list, box_list):
-        value += 1
-        name, logit = label.split('(')
-        logit = logit[:-1] # the last is ')'
-        json_data.append({
-            'value': value,
-            'label': name,
-            'logit': float(logit),
-            'box': box.numpy().tolist(),
-        })
-    with open(os.path.join(output_dir, 'mask.json'), 'w') as f:
-        json.dump(json_data, f)
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser("Grounded-Segment-Anything Demo", add_help=True)
-    parser.add_argument("--config", type=str, required=True, help="path to config file")
-    parser.add_argument(
-        "--grounded_checkpoint", type=str, required=True, help="path to checkpoint file"
-    )
-    parser.add_argument(
-        "--sam_version", type=str, default="vit_h", required=False, help="SAM ViT version: vit_b / vit_l / vit_h"
-    )
-    parser.add_argument(
-        "--sam_checkpoint", type=str, required=False, help="path to sam checkpoint file"
-    )
-    parser.add_argument(
-        "--sam_hq_checkpoint", type=str, default=None, help="path to sam-hq checkpoint file"
-    )
-    parser.add_argument(
-        "--use_sam_hq", action="store_true", help="using sam-hq for prediction"
-    )
-    parser.add_argument("--input_image", type=str, required=True, help="path to image file")
-    parser.add_argument("--text_prompt", type=str, required=True, help="text prompt")
-    parser.add_argument(
-        "--output_dir", "-o", type=str, default="outputs", required=True, help="output directory"
-    )
-    parser.add_argument("--box_threshold", type=float, default=0.3, help="box threshold")
-    parser.add_argument("--text_threshold", type=float, default=0.25, help="text threshold")
-    parser.add_argument("--device", type=str, default="cpu", help="running on cpu only!, default=False")
-    args = parser.parse_args()
-    # cfg
-    config_file = args.config  # change the path of the model config file
-    grounded_checkpoint = args.grounded_checkpoint  # change the path of the model
-    sam_version = args.sam_version
-    sam_checkpoint = args.sam_checkpoint
-    sam_hq_checkpoint = args.sam_hq_checkpoint
-    use_sam_hq = args.use_sam_hq
-    image_path = args.input_image
-    text_prompt = args.text_prompt
-    output_dir = args.output_dir
-    box_threshold = args.box_threshold
-    text_threshold = args.text_threshold
-    device = args.device
-    # make dir
-    os.makedirs(output_dir, exist_ok=True)
-    # load image
-    image_pil, image = load_image(image_path)
-    # load model
-    model = load_model(config_file, grounded_checkpoint, device=device)
-    # visualize raw image
-    image_pil.save(os.path.join(output_dir, "raw_image.jpg"))
-    # run grounding dino model
-    boxes_filt, pred_phrases = get_grounding_output(
-        model, image, text_prompt, box_threshold, text_threshold, device=device
-    )
-    # initialize SAM
-    if use_sam_hq:
-        predictor = SamPredictor(sam_hq_model_registry[sam_version](checkpoint=sam_hq_checkpoint).to(device))
-    else:
-        predictor = SamPredictor(sam_model_registry[sam_version](checkpoint=sam_checkpoint).to(device))
-    image = cv2.imread(image_path)
-    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     predictor.set_image(image)
-    size = image_pil.size
     H, W = size[1], size[0]
     for i in range(boxes_filt.size(0)):
         boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
@@ -209,27 +98,30 @@ if __name__ == "__main__":
         boxes_filt[i][2:] += boxes_filt[i][:2]
     boxes_filt = boxes_filt.cpu()
-    transformed_boxes = predictor.transform.apply_boxes_torch(boxes_filt, image.shape[:2]).to(device)
     masks, _, _ = predictor.predict_torch(
-        point_coords = None,
-        point_labels = None,
-        boxes = transformed_boxes.to(device),
-        multimask_output = False,
     )
-    # draw output image
-    plt.figure(figsize=(10, 10))
-    plt.imshow(image)
-    for mask in masks:
-        show_mask(mask.cpu().numpy(), plt.gca(), random_color=True)
-    for box, label in zip(boxes_filt, pred_phrases):
-        show_box(box.numpy(), plt.gca(), label)
     plt.axis('off')
-    plt.savefig(
-        os.path.join(output_dir, "grounded_sam_output.jpg"),
-        bbox_inches="tight", dpi=300, pad_inches=0.0
-    )
-    save_mask_data(output_dir, masks, boxes_filt, pred_phrases)

+from GroundingDINO.groundingdino.datasets.transforms import Compose, RandomResize, ToTensor, Normalize
+from io import BytesIO
 import os
 import copy
 # segment anything
 from segment_anything import (
+    build_sam,
+    build_sam_hq,
     SamPredictor
 )
 import cv2
 import matplotlib.pyplot as plt
 def load_model(model_config_path, model_checkpoint_path, device):
     args = SLConfig.fromfile(model_config_path)
     args.device = device
     model = build_model(args)
     checkpoint = torch.load(model_checkpoint_path, map_location="cpu")
+    load_res = model.load_state_dict(
+        clean_state_dict(checkpoint["model"]), strict=False)
     print(load_res)
     _ = model.eval()
     return model
     boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
     logits_filt.shape[0]
+    return boxes_filt
+def grounded_sam_demo(input_pil, config_file, grounded_checkpoint, sam_checkpoint,
+                      text_prompt, box_threshold=0.3, text_threshold=0.25,
+                      device="cuda"):
+    # Convert PIL image to tensor with normalization
+    transform = Compose([
+        RandomResize([800], max_size=1333),
+        ToTensor(),
+        Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+    ])
+    if input_pil.mode != "RGB":
+        input_pil = input_pil.convert("RGB")
+    image, _ = transform(input_pil, None)
+    # Load model
+    model = load_model(config_file, grounded_checkpoint, device=device)
+    # Get grounding dino model output
+    boxes_filt = get_grounding_output(
+        model, image, text_prompt, box_threshold, text_threshold, device=device)
+    # Initialize SAM
+    predictor = SamPredictor(build_sam(checkpoint=sam_checkpoint).to(device))
+    image = cv2.cvtColor(np.array(input_pil), cv2.COLOR_RGB2BGR)
     predictor.set_image(image)
+    size = input_pil.size
     H, W = size[1], size[0]
     for i in range(boxes_filt.size(0)):
         boxes_filt[i] = boxes_filt[i] * torch.Tensor([W, H, W, H])
         boxes_filt[i][2:] += boxes_filt[i][:2]
     boxes_filt = boxes_filt.cpu()
+    transformed_boxes = predictor.transform.apply_boxes_torch(
+        boxes_filt, image.shape[:2]).to(device)
     masks, _, _ = predictor.predict_torch(
+        point_coords=None,
+        point_labels=None,
+        boxes=transformed_boxes.to(device),
+        multimask_output=False,
     )
+    # Create mask image
+    value = 0  # 0 for background
+    mask_img = torch.zeros(masks.shape[-2:])
+    for idx, mask in enumerate(masks):
+        mask_img[mask.cpu().numpy()[0] == True] = value + idx + 1
+    fig = plt.figure(figsize=(10, 10))
+    plt.imshow(mask_img.numpy())
     plt.axis('off')
+    buf = BytesIO()
+    plt.savefig(buf, format='png', bbox_inches="tight",
+                dpi=300, pad_inches=0.0)
+    buf.seek(0)
+    out_pil = Image.open(buf)
+    return out_pil

handler.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import os
+import subprocess
+import torch
+from test import just_get_sd_mask
+import requests
+from PIL import Image
+from io import BytesIO
+print(os.listdir('/usr/local/'))
+print(torch.version.cuda)
+class EndpointHandler():
+    def __init__(self, path="."):
+        is_production = True
+        if False:
+            return
+        os.chdir(path)
+        os.environ['AM_I_DOCKER'] = 'False'
+        os.environ['BUILD_WITH_CUDA'] = 'True'
+        os.environ['CUDA_HOME'] = '/usr/local/cuda-11.7/' if is_production else '/usr/local/cuda-12.1/'
+        # Install Segment Anything
+        subprocess.run(["python", "-m", "pip", "install", "-e", "segment_anything"])
+        # Install Grounding DINO
+        subprocess.run(["python", "-m", "pip", "install", "-e", "GroundingDINO"])
+        # Install diffusers
+        subprocess.run(["pip", "install", "--upgrade", "diffusers[torch]"])
+        # Install osx
+        subprocess.run(["git", "submodule", "update", "--init", "--recursive"])
+        subprocess.run(["bash", "grounded-sam-osx/install.sh"], cwd="grounded-sam-osx")
+        # Install RAM & Tag2Text
+        subprocess.run(["git", "clone", "https://github.com/xinyu1205/recognize-anything.git"])
+        subprocess.run(["pip", "install", "-r", "./recognize-anything/requirements.txt"])
+        subprocess.run(["pip", "install", "-e", "./recognize-anything/"])
+    def __call__(self, data):
+        mask_pil = just_get_sd_mask(Image.open("assets/demo1.jpg"), "bear", 10)
+        if mask_pil.mode != 'RGB':
+            mask_pil = mask_pil.convert('RGB')
+        # Convert PIL image to byte array
+        img_byte_arr = BytesIO()
+        mask_pil.save(img_byte_arr, format='JPEG')
+        img_byte_arr = img_byte_arr.getvalue()
+        # Upload to file.io
+        response = requests.post("https://file.io/", files={"file": img_byte_arr})
+        url = response.json().get('link')
+        return {"url": url}

handler_test.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from handler import EndpointHandler
+# init handler
+my_handler = EndpointHandler(path=".")
+# prepare sample payload
+non_holiday_payload = {"inputs": "I am quite excited how this will turn out", "date": "2022-08-08"}
+# test the handler
+non_holiday_pred=my_handler(non_holiday_payload)
+# show results
+print("non_holiday_pred", non_holiday_pred)

test.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from grounded_sam_demo import grounded_sam_demo
+import numpy as np
+from PIL import Image
+from scipy.ndimage import convolve
+from scipy.ndimage import binary_dilation
+def get_sd_mask(color_mask_pil, target=(72, 4, 84), tolerance=50):
+    image_array = np.array(color_mask_pil)
+    # Update target based on the number of color channels in the image array
+    target = np.array(list(target) + [255] *
+                      (image_array.shape[-1] - len(target)))
+    mask = np.abs(image_array - target) <= tolerance
+    mask = np.all(mask, axis=-1)
+    new_image_array = np.ones_like(image_array) * 255  # Start with white
+    # Apply black where condition met
+    new_image_array[mask] = [0] * image_array.shape[-1]
+    return Image.fromarray(new_image_array)
+def expand_white_pixels(input_pil, expand_by=1):
+    img_array = np.array(input_pil)
+    is_white = np.all(img_array == 255, axis=-1)
+    kernel = np.ones((2*expand_by+1, 2*expand_by+1), bool)
+    expanded_white = binary_dilation(is_white, structure=kernel)
+    expanded_array = np.where(expanded_white[..., None], 255, img_array)
+    expanded_pil = Image.fromarray(expanded_array.astype('uint8'))
+    return expanded_pil
+config_file = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
+grounded_checkpoint = "groundingdino_swint_ogc.pth"
+sam_checkpoint = "sam_hq_vit_h.pth"
+def just_get_sd_mask(input_pil, text_prompt, padding):
+    print("Doing sam")
+    colored_mask_pil = grounded_sam_demo(
+        input_pil, config_file, grounded_checkpoint, sam_checkpoint, text_prompt)
+    print("doing to white")
+    sd_mask_pil = get_sd_mask(colored_mask_pil)
+    print("expanding white pixels")
+    sd_mask_withpadding_pil = expand_white_pixels(sd_mask_pil, padding)
+    return sd_mask_withpadding_pil