ttengwang
support "segment everything in a paragraph"
ccb14a3
import cv2
import json
import numpy as np
from typing import List
import random
from typing import Union
def draw_bbox(img: Union[np.ndarray, str], save_name: str, bbox: List[dict], show_caption: bool = False):
"""
bbox: [{'image_id': str, 'bbox': [x1, y1, x2, y2], 'caption': str}, ...]
"""
if isinstance(img, str):
img = cv2.imread(img)
RGB = [0, 50, 100, 150, 200, 250]
for box in bbox:
box['bbox'] = [int(_) for _ in box['bbox']]
x1, y1, x2, y2 = box['bbox']
caption = box['caption']
box_color = random.choices(RGB, k = 3)
(text_width, text_height), _ = cv2.getTextSize(caption, cv2.FONT_HERSHEY_SIMPLEX, fontScale = 0.5, thickness = 2)
cv2.rectangle(img, (x1, y1), (x2, y2), color = box_color, thickness = 2)
if show_caption:
cv2.putText(img, caption, (x1, y1 + text_height), cv2.FONT_HERSHEY_SIMPLEX, fontScale = 0.5, color = box_color, thickness = 2)
cv2.imwrite(save_name, img)
# cv2.imshow('visualise', img)
# cv2.waitKey(0)
def parse_bbox(anno, image_id: int = None):
with open(anno, 'r') as f:
predictions = json.load(f)
if image_id is None:
image_id = next(iter(predictions))
return predictions[image_id]
def gt_bbox(anno, img_name: int = None):
with open(anno, 'r') as f:
annotations = json.load(f)
annotations = annotations['annotations']
gt = []
img_name = int(img_name[:-4])
for annotation in annotations:
if annotation['image_id'] == 63:
x1, y1, w, h = annotation['bbox']
gt.append({'bbox': [x1, y1, x1 + w, y1 + h], 'caption': annotation['caption']})
return gt
if __name__ == '__main__':
img_name = '63.jpg'
show_caption = True
anno = 'vg_dense_captioning_blip2_top48_0.88_1000_0.96_debugTrue_predictions_shard_all.json'
img = cv2.imread(img_name)
examp_bbox = parse_bbox(anno)
ground_truth_bbox = gt_bbox('test.json', img_name)
draw_bbox(img, 'GT.jpg', ground_truth_bbox, show_caption)
draw_bbox(img, 'Pred.jpg', examp_bbox, show_caption)