Spaces:
Runtime error
Runtime error
Mountchicken
commited on
Commit
•
bf9dee2
1
Parent(s):
cfa95f2
Upload 28 files
Browse files- .gitattributes +11 -0
- app.py +77 -0
- asset/demo.jpg +0 -0
- asset/demo2.jpeg +0 -0
- asset/demo3.jpeg +0 -0
- asset/demo4.jpeg +0 -0
- asset/demo5.jpeg +0 -0
- asset/demo_output.jpg +0 -0
- asset/gd1.5_overall_framework.png +0 -0
- asset/qualitative_visualization/common_object_vis.png +3 -0
- asset/qualitative_visualization/common_object_vis2.png +3 -0
- asset/qualitative_visualization/dense_object_vis.png +3 -0
- asset/qualitative_visualization/dense_object_vis2.png +3 -0
- asset/qualitative_visualization/edge_vis.png +3 -0
- asset/qualitative_visualization/long_caption_vis.png +3 -0
- asset/qualitative_visualization/long_caption_vis2.png +3 -0
- asset/qualitative_visualization/long_caption_vis3.png +3 -0
- asset/qualitative_visualization/longtail_object_vis.png +3 -0
- asset/qualitative_visualization/short_caption_vis.png +3 -0
- asset/qualitative_visualization/video_object_vis.png +3 -0
- asset/video_cover.jpg +0 -0
- asset/zeroshot.png +0 -0
- gdino/__init__.py +4 -0
- gdino/__pycache__/visualize.cpython-38.pyc +0 -0
- gdino/model_wrapper.py +132 -0
- gdino/version.py +1 -0
- gdino/visualize.py +108 -0
- requirements.txt +2 -0
- setup.py +126 -0
.gitattributes
CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
asset/qualitative_visualization/common_object_vis.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
asset/qualitative_visualization/common_object_vis2.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
asset/qualitative_visualization/dense_object_vis.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
asset/qualitative_visualization/dense_object_vis2.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
asset/qualitative_visualization/edge_vis.png filter=lfs diff=lfs merge=lfs -text
|
41 |
+
asset/qualitative_visualization/long_caption_vis.png filter=lfs diff=lfs merge=lfs -text
|
42 |
+
asset/qualitative_visualization/long_caption_vis2.png filter=lfs diff=lfs merge=lfs -text
|
43 |
+
asset/qualitative_visualization/long_caption_vis3.png filter=lfs diff=lfs merge=lfs -text
|
44 |
+
asset/qualitative_visualization/longtail_object_vis.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
asset/qualitative_visualization/short_caption_vis.png filter=lfs diff=lfs merge=lfs -text
|
46 |
+
asset/qualitative_visualization/video_object_vis.png filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import argparse
|
3 |
+
from typing import Dict, List
|
4 |
+
from gdino import GroundingDINOAPIWrapper, visualize
|
5 |
+
import gradio as gr
|
6 |
+
import numpy as np
|
7 |
+
import cv2
|
8 |
+
def arg_parse():
|
9 |
+
parser = argparse.ArgumentParser(description="Gradio Demo for T-Rex2")
|
10 |
+
parser.add_argument(
|
11 |
+
"--token",
|
12 |
+
type=str,
|
13 |
+
help="This token is only for gradio space. Please do not take it away for your own purpose!",
|
14 |
+
)
|
15 |
+
args = parser.parse_args()
|
16 |
+
return args
|
17 |
+
|
18 |
+
def resize_image_with_aspect_ratio(image: np.ndarray, min_size: int = 800, max_size: int = 1333) -> np.ndarray:
|
19 |
+
h, w = image.shape[:2]
|
20 |
+
aspect_ratio = w / h
|
21 |
+
|
22 |
+
# Determine the scaling factor based on the constraints
|
23 |
+
if h < w:
|
24 |
+
new_height = min_size
|
25 |
+
new_width = int(new_height * aspect_ratio)
|
26 |
+
if new_width > max_size:
|
27 |
+
new_width = max_size
|
28 |
+
new_height = int(new_width / aspect_ratio)
|
29 |
+
else:
|
30 |
+
new_width = min_size
|
31 |
+
new_height = int(new_width / aspect_ratio)
|
32 |
+
if new_height > max_size:
|
33 |
+
new_height = max_size
|
34 |
+
new_width = int(new_height * aspect_ratio)
|
35 |
+
|
36 |
+
# Resize the image
|
37 |
+
resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
|
38 |
+
|
39 |
+
return resized_image
|
40 |
+
|
41 |
+
def inference(image, prompt: str, return_mask: bool = False, return_score: bool = False) -> gr.Image:
|
42 |
+
# shrink image first to save computation
|
43 |
+
if return_mask:
|
44 |
+
image = resize_image_with_aspect_ratio(image, min_size=600, max_size=1000)
|
45 |
+
prompts = dict(image=image, prompt=prompt)
|
46 |
+
results = gdino.inference(prompts, return_mask=return_mask)
|
47 |
+
image_pil = visualize(image, results, return_mask=return_mask, draw_score=return_score)
|
48 |
+
return image_pil
|
49 |
+
|
50 |
+
args = arg_parse()
|
51 |
+
gdino = GroundingDINOAPIWrapper(args.token)
|
52 |
+
|
53 |
+
if __name__ == "__main__":
|
54 |
+
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
|
55 |
+
with gr.Row():
|
56 |
+
with gr.Column():
|
57 |
+
input_image = gr.Image(label="Input Image")
|
58 |
+
with gr.Column():
|
59 |
+
output_image = gr.Image(label="Output Image")
|
60 |
+
with gr.Row():
|
61 |
+
return_mask = gr.Checkbox(label="Return Mask")
|
62 |
+
return_score = gr.Checkbox(label="Return Score")
|
63 |
+
prompt = gr.Textbox(label="Prompt", placeholder="e.g., person.pigeon.tree")
|
64 |
+
run = gr.Button(value="Run")
|
65 |
+
with gr.Row():
|
66 |
+
gr.Examples(
|
67 |
+
examples=[
|
68 |
+
['asset/demo.jpg', 'person . pigeon . tree'],
|
69 |
+
['asset/demo2.jpeg', 'wireless walkie-talkie . life jacket . atlantic cod . man . vehicle . accessory . cell phone .'],
|
70 |
+
['asset/demo3.jpeg', 'wine rack . bottle . basket'],
|
71 |
+
['asset/demo4.jpeg', 'Mosque. golden dome. smaller domes. minarets. arched windows. white facade. cars. electrical lines. streetlights. trees. pedestrians. blue sky. shadows'],
|
72 |
+
['asset/demo5.jpeg', 'stately building. columns. sculptures. Spanish flag. clouds. blue sky. street. taxis. van. city bus. traffic lights. street lamps. road markings. pedestrians. sidewalk. traffic sign. palm trees']
|
73 |
+
],
|
74 |
+
inputs=[input_image, prompt],
|
75 |
+
)
|
76 |
+
run.click(inference, inputs=[input_image, prompt, return_mask, return_score], outputs=output_image)
|
77 |
+
demo.launch(debug=True)
|
asset/demo.jpg
ADDED
asset/demo2.jpeg
ADDED
asset/demo3.jpeg
ADDED
asset/demo4.jpeg
ADDED
asset/demo5.jpeg
ADDED
asset/demo_output.jpg
ADDED
asset/gd1.5_overall_framework.png
ADDED
asset/qualitative_visualization/common_object_vis.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/common_object_vis2.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/dense_object_vis.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/dense_object_vis2.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/edge_vis.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/long_caption_vis.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/long_caption_vis2.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/long_caption_vis3.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/longtail_object_vis.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/short_caption_vis.png
ADDED
Git LFS Details
|
asset/qualitative_visualization/video_object_vis.png
ADDED
Git LFS Details
|
asset/video_cover.jpg
ADDED
asset/zeroshot.png
ADDED
gdino/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .model_wrapper import GroundingDINOAPIWrapper
|
2 |
+
from .visualize import visualize
|
3 |
+
|
4 |
+
__all__ = ["GroundingDINOAPIWrapper", "visualize"]
|
gdino/__pycache__/visualize.cpython-38.pyc
ADDED
Binary file (3.17 kB). View file
|
|
gdino/model_wrapper.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tempfile
|
2 |
+
from typing import Dict, List, Union
|
3 |
+
import numpy as np
|
4 |
+
from dds_cloudapi_sdk import (
|
5 |
+
DetectionTask,
|
6 |
+
Client,
|
7 |
+
Config,
|
8 |
+
TextPrompt,
|
9 |
+
DetectionModel,
|
10 |
+
DetectionTarget,
|
11 |
+
)
|
12 |
+
from PIL import Image
|
13 |
+
import concurrent.futures
|
14 |
+
|
15 |
+
class GroundingDINOAPIWrapper:
|
16 |
+
"""API wrapper for Grounding DINO 1.5
|
17 |
+
|
18 |
+
Args:
|
19 |
+
token (str): The token for Grounding DINO 1.5 API. We are now opening free API access to Grounding DINO 1.5. For
|
20 |
+
educators, students, and researchers, we offer an API with extensive usage times to
|
21 |
+
support your educational and research endeavors. You can get free API token at here:
|
22 |
+
https://deepdataspace.com/request_api
|
23 |
+
|
24 |
+
"""
|
25 |
+
|
26 |
+
def __init__(self, token: str):
|
27 |
+
self.client = Client(Config(token=token))
|
28 |
+
|
29 |
+
def inference(self, prompt: Dict, return_mask:bool=False):
|
30 |
+
"""Main inference function of Grounding DINO 1.5. We take batch as input and
|
31 |
+
each image is a dict. N. We do not support batch inference for now.
|
32 |
+
|
33 |
+
Args:
|
34 |
+
prompts (dict): Annotations with the following keys:
|
35 |
+
- "image" (str): Path to image. E.g. "test1.jpg",
|
36 |
+
- "prompt" (str): Text prompt sepearted by '.' E.g. 'cate1 . cate2 . cate3'
|
37 |
+
return_mask (bool): Whether to return mask. Defaults to False.
|
38 |
+
|
39 |
+
Returns:
|
40 |
+
(Dict): Detection results in dict format with keys::
|
41 |
+
- "scores": (List[float]): A list of scores for each object in the batch
|
42 |
+
- "labels": (List[int]): A list of labels for each object in the batch
|
43 |
+
- "boxes": (List[List[int]]): A list of boxes for each object in the batch,
|
44 |
+
in format [xmin, ymin, xmax, ymax]
|
45 |
+
- "masks": (List[np.ndarray]): A list of segmentations for each object in the batch
|
46 |
+
"""
|
47 |
+
# construct input prompts
|
48 |
+
image=self.get_image_url(prompt["image"]),
|
49 |
+
task=DetectionTask(
|
50 |
+
image_url=image[0],
|
51 |
+
prompts=[TextPrompt(text=prompt['prompt'])],
|
52 |
+
targets=[DetectionTarget.Mask, DetectionTarget.BBox] if return_mask else [DetectionTarget.BBox],
|
53 |
+
model=DetectionModel.GDino1_5_Pro,
|
54 |
+
)
|
55 |
+
self.client.run_task(task)
|
56 |
+
result = task.result
|
57 |
+
return self.postprocess(result, task, return_mask)
|
58 |
+
|
59 |
+
|
60 |
+
def postprocess(self, result, task, return_mask):
|
61 |
+
"""Postprocess the result from the API call
|
62 |
+
|
63 |
+
Args:
|
64 |
+
result (TaskResult): Task result with the following keys:
|
65 |
+
- objects (List[DetectionObject]): Each DetectionObject has the following keys:
|
66 |
+
- bbox (List[float]): Box in xyxy format
|
67 |
+
- category (str): Detection category
|
68 |
+
- score (float): Detection score
|
69 |
+
- mask (DetectionObjectMask): Use mask.counts to parse RLE mask
|
70 |
+
task (DetectionTask): The task object
|
71 |
+
return_mask (bool): Whether to return mask
|
72 |
+
|
73 |
+
Returns:
|
74 |
+
(Dict): Return dict in format:
|
75 |
+
{
|
76 |
+
"scores": (List[float]): A list of scores for each object
|
77 |
+
"categorys": (List[str]): A list of categorys for each object
|
78 |
+
"boxes": (List[List[int]]): A list of boxes for each object
|
79 |
+
"masks": (List[PIL.Image]): A list of masks in the format of PIL.Image
|
80 |
+
}
|
81 |
+
"""
|
82 |
+
def process_object_with_mask(object):
|
83 |
+
box = object.bbox
|
84 |
+
score = object.score
|
85 |
+
category = object.category
|
86 |
+
mask = task.rle2rgba(object.mask)
|
87 |
+
return box, score, category, mask
|
88 |
+
|
89 |
+
def process_object_without_mask(object):
|
90 |
+
box = object.bbox
|
91 |
+
score = object.score
|
92 |
+
category = object.category
|
93 |
+
mask = None
|
94 |
+
return box, score, category, mask
|
95 |
+
|
96 |
+
boxes, scores, categorys, masks = [], [], [], []
|
97 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
98 |
+
if return_mask:
|
99 |
+
process_object = process_object_with_mask
|
100 |
+
else:
|
101 |
+
process_object = process_object_without_mask
|
102 |
+
futures = [executor.submit(process_object, obj) for obj in result.objects]
|
103 |
+
for future in concurrent.futures.as_completed(futures):
|
104 |
+
box, score, category, mask = future.result()
|
105 |
+
boxes.append(box)
|
106 |
+
scores.append(score)
|
107 |
+
categorys.append(category)
|
108 |
+
if mask is not None:
|
109 |
+
masks.append(mask)
|
110 |
+
|
111 |
+
return dict(boxes=boxes, categorys=categorys, scores=scores, masks=masks)
|
112 |
+
|
113 |
+
def get_image_url(self, image: Union[str, np.ndarray]):
|
114 |
+
"""Upload Image to server and return the url
|
115 |
+
|
116 |
+
Args:
|
117 |
+
image (Union[str, np.ndarray]): The image to upload. Can be a file path or np.ndarray.
|
118 |
+
If it is a np.ndarray, it will be saved to a temporary file.
|
119 |
+
|
120 |
+
Returns:
|
121 |
+
str: The url of the image
|
122 |
+
"""
|
123 |
+
if isinstance(image, str):
|
124 |
+
url = self.client.upload_file(image)
|
125 |
+
else:
|
126 |
+
with tempfile.NamedTemporaryFile(delete=True, suffix=".png") as tmp_file:
|
127 |
+
# image is in numpy format, convert to PIL Image
|
128 |
+
image = Image.fromarray(image)
|
129 |
+
image.save(tmp_file, format="PNG")
|
130 |
+
tmp_file_path = tmp_file.name
|
131 |
+
url = self.client.upload_file(tmp_file_path)
|
132 |
+
return url
|
gdino/version.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__version__ = 'v1.5'
|
gdino/visualize.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
from PIL import Image, ImageDraw, ImageFont, ImageOps
|
5 |
+
import random
|
6 |
+
|
7 |
+
|
8 |
+
def draw_mask(mask, draw, random_color=True):
|
9 |
+
"""Draws a mask with a specified color on an image.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
mask (np.array): Binary mask as a NumPy array.
|
13 |
+
draw (ImageDraw.Draw): ImageDraw object to draw on the image.
|
14 |
+
random_color (bool): Whether to use a random color for the mask.
|
15 |
+
"""
|
16 |
+
if random_color:
|
17 |
+
color = (
|
18 |
+
random.randint(0, 255),
|
19 |
+
random.randint(0, 255),
|
20 |
+
random.randint(0, 255),
|
21 |
+
153,
|
22 |
+
)
|
23 |
+
else:
|
24 |
+
color = (30, 144, 255, 153)
|
25 |
+
|
26 |
+
nonzero_coords = np.transpose(np.nonzero(mask))
|
27 |
+
|
28 |
+
for coord in nonzero_coords:
|
29 |
+
draw.point(coord[::-1], fill=color)
|
30 |
+
|
31 |
+
def visualize(image_pil: Image,
|
32 |
+
result: Dict,
|
33 |
+
draw_width: float = 6.0,
|
34 |
+
return_mask=True,
|
35 |
+
draw_score=True) -> Image:
|
36 |
+
"""Plot bounding boxes and labels on an image.
|
37 |
+
|
38 |
+
Args:
|
39 |
+
image_pil (PIL.Image): The input image as a PIL Image object.
|
40 |
+
result (Dict[str, Union[torch.Tensor, List[torch.Tensor]]]): The target dictionary containing
|
41 |
+
the bounding boxes and labels. The keys are:
|
42 |
+
- boxes (List[int]): A list of bounding boxes in shape (N, 4), [x1, y1, x2, y2] format.
|
43 |
+
- scores (List[float]): A list of scores for each bounding box. shape (N)
|
44 |
+
- categorys (List[str]): A list of categorys for each object
|
45 |
+
- masks (List[PIL.Image]): A list of masks in the format of PIL.Image
|
46 |
+
draw_score (bool): Draw score on the image. Defaults to False.
|
47 |
+
|
48 |
+
Returns:
|
49 |
+
PIL.Image: The input image with plotted bounding boxes, labels, and masks.
|
50 |
+
"""
|
51 |
+
# Get the bounding boxes and labels from the target dictionary
|
52 |
+
boxes = result["boxes"]
|
53 |
+
scores = result["scores"]
|
54 |
+
categorys = result["categorys"]
|
55 |
+
masks = result.get("masks", [])
|
56 |
+
|
57 |
+
# Find all unique categories and build a cate2color dictionary
|
58 |
+
cate2color = {}
|
59 |
+
unique_categorys = set(categorys)
|
60 |
+
for cate in unique_categorys:
|
61 |
+
cate2color[cate] = tuple(np.random.randint(0, 255, size=3).tolist())
|
62 |
+
|
63 |
+
# Create a PIL ImageDraw object to draw on the input image
|
64 |
+
if isinstance(image_pil, np.ndarray):
|
65 |
+
image_pil = Image.fromarray(image_pil)
|
66 |
+
draw = ImageDraw.Draw(image_pil)
|
67 |
+
|
68 |
+
# Create a new binary mask image with the same size as the input image
|
69 |
+
mask = Image.new("L", image_pil.size, 0)
|
70 |
+
# Create a PIL ImageDraw object to draw on the mask image
|
71 |
+
mask_draw = ImageDraw.Draw(mask)
|
72 |
+
|
73 |
+
# Draw boxes, labels, and masks for each box and label in the target dictionary
|
74 |
+
for box, score, category in zip(boxes, scores, categorys):
|
75 |
+
# Extract the box coordinates
|
76 |
+
x0, y0, x1, y1 = box
|
77 |
+
x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)
|
78 |
+
color = cate2color[category]
|
79 |
+
|
80 |
+
# Draw the box outline on the input image
|
81 |
+
draw.rectangle([x0, y0, x1, y1], outline=color, width=int(draw_width))
|
82 |
+
|
83 |
+
# Draw the label and score on the input image
|
84 |
+
if draw_score:
|
85 |
+
text = f"{category} {score:.2f}"
|
86 |
+
else:
|
87 |
+
text = f"{category}"
|
88 |
+
|
89 |
+
font = ImageFont.load_default()
|
90 |
+
if hasattr(font, "getbbox"):
|
91 |
+
bbox = draw.textbbox((x0, y0), text, font)
|
92 |
+
else:
|
93 |
+
w, h = draw.textsize(text, font)
|
94 |
+
bbox = (x0, y0, w + x0, y0 + h)
|
95 |
+
draw.rectangle(bbox, fill=color)
|
96 |
+
draw.text((x0, y0), text, fill="white")
|
97 |
+
|
98 |
+
# Draw the mask on the input image if masks are provided
|
99 |
+
if len(masks) > 0 and return_mask:
|
100 |
+
size = image_pil.size
|
101 |
+
mask_image = Image.new("RGBA", size, color=(0, 0, 0, 0))
|
102 |
+
mask_draw = ImageDraw.Draw(mask_image)
|
103 |
+
for mask in masks:
|
104 |
+
mask = np.array(mask)[:, :, -1]
|
105 |
+
draw_mask(mask, mask_draw)
|
106 |
+
|
107 |
+
image_pil = Image.alpha_composite(image_pil.convert("RGBA"), mask_image).convert("RGB")
|
108 |
+
return image_pil
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
dds-cloudapi-sdk==0.2.1
|
2 |
+
gradio==4.22.0
|
setup.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import glob
|
2 |
+
import os
|
3 |
+
import subprocess
|
4 |
+
|
5 |
+
import torch
|
6 |
+
from setuptools import find_packages, setup
|
7 |
+
from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
|
8 |
+
|
9 |
+
version = "v1.5"
|
10 |
+
package_name = "gdino"
|
11 |
+
cwd = os.path.dirname(os.path.abspath(__file__))
|
12 |
+
|
13 |
+
sha = "Unknown"
|
14 |
+
try:
|
15 |
+
sha = subprocess.check_output(["git", "rev-parse", "HEAD"],
|
16 |
+
cwd=cwd).decode("ascii").strip()
|
17 |
+
except Exception:
|
18 |
+
pass
|
19 |
+
|
20 |
+
|
21 |
+
def write_version_file():
|
22 |
+
version_path = os.path.join(cwd, "gdino/", "version.py")
|
23 |
+
with open(version_path, "w") as f:
|
24 |
+
f.write(f"__version__ = '{version}'\n")
|
25 |
+
# f.write(f"git_version = {repr(sha)}\n")
|
26 |
+
|
27 |
+
|
28 |
+
def parse_requirements(fname="requirements.txt", with_version=True):
|
29 |
+
"""Parse the package dependencies listed in a requirements file but strips
|
30 |
+
specific versioning information.
|
31 |
+
|
32 |
+
Args:
|
33 |
+
fname (str): path to requirements file
|
34 |
+
with_version (bool, default=False): if True include version specs
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
List[str]: list of requirements items
|
38 |
+
|
39 |
+
CommandLine:
|
40 |
+
python -c "import setup; print(setup.parse_requirements())"
|
41 |
+
"""
|
42 |
+
import re
|
43 |
+
import sys
|
44 |
+
from os.path import exists
|
45 |
+
|
46 |
+
require_fpath = fname
|
47 |
+
|
48 |
+
def parse_line(line):
|
49 |
+
"""Parse information from a line in a requirements text file."""
|
50 |
+
if line.startswith("-r "):
|
51 |
+
# Allow specifying requirements in other files
|
52 |
+
target = line.split(" ")[1]
|
53 |
+
for info in parse_require_file(target):
|
54 |
+
yield info
|
55 |
+
else:
|
56 |
+
info = {"line": line}
|
57 |
+
if line.startswith("-e "):
|
58 |
+
info["package"] = line.split("#egg=")[1]
|
59 |
+
elif "@git+" in line:
|
60 |
+
info["package"] = line
|
61 |
+
else:
|
62 |
+
# Remove versioning from the package
|
63 |
+
pat = "(" + "|".join([">=", "==", ">"]) + ")"
|
64 |
+
parts = re.split(pat, line, maxsplit=1)
|
65 |
+
parts = [p.strip() for p in parts]
|
66 |
+
|
67 |
+
info["package"] = parts[0]
|
68 |
+
if len(parts) > 1:
|
69 |
+
op, rest = parts[1:]
|
70 |
+
if ";" in rest:
|
71 |
+
# Handle platform specific dependencies
|
72 |
+
# http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
|
73 |
+
version, platform_deps = map(str.strip,
|
74 |
+
rest.split(";"))
|
75 |
+
info["platform_deps"] = platform_deps
|
76 |
+
else:
|
77 |
+
version = rest # NOQA
|
78 |
+
info["version"] = (op, version)
|
79 |
+
yield info
|
80 |
+
|
81 |
+
def parse_require_file(fpath):
|
82 |
+
with open(fpath, "r") as f:
|
83 |
+
for line in f.readlines():
|
84 |
+
line = line.strip()
|
85 |
+
if line and not line.startswith("#"):
|
86 |
+
for info in parse_line(line):
|
87 |
+
yield info
|
88 |
+
|
89 |
+
def gen_packages_items():
|
90 |
+
if exists(require_fpath):
|
91 |
+
for info in parse_require_file(require_fpath):
|
92 |
+
parts = [info["package"]]
|
93 |
+
if with_version and "version" in info:
|
94 |
+
parts.extend(info["version"])
|
95 |
+
if not sys.version.startswith("3.4"):
|
96 |
+
# apparently package_deps are broken in 3.4
|
97 |
+
platform_deps = info.get("platform_deps")
|
98 |
+
if platform_deps is not None:
|
99 |
+
parts.append(";" + platform_deps)
|
100 |
+
item = "".join(parts)
|
101 |
+
yield item
|
102 |
+
|
103 |
+
packages = list(gen_packages_items())
|
104 |
+
return packages
|
105 |
+
|
106 |
+
|
107 |
+
if __name__ == "__main__":
|
108 |
+
print(f"Building wheel {package_name}-{version}")
|
109 |
+
|
110 |
+
with open("LICENSE", "r", encoding="utf-8") as f:
|
111 |
+
license = f.read()
|
112 |
+
|
113 |
+
write_version_file()
|
114 |
+
|
115 |
+
setup(
|
116 |
+
name="gdino",
|
117 |
+
version="v1.5",
|
118 |
+
author="International Digital Economy Academy, CVR",
|
119 |
+
url="https://github.com/IDEA-Research/Grounding-DINO-1.5-API",
|
120 |
+
description="Grounding DINO 1.5 API wrapper.",
|
121 |
+
license=license,
|
122 |
+
install_requires=parse_requirements("requirements.txt"),
|
123 |
+
packages=find_packages(exclude=("tests", )),
|
124 |
+
ext_modules=None,
|
125 |
+
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
|
126 |
+
)
|