zhouyik's picture
Upload folder using huggingface_hub
032e687 verified
from PIL import Image, ImageDraw
import random, math
import numpy as np
from shapely.ops import unary_union
from shapely.geometry import Point, Polygon
from scipy.stats import multivariate_normal
from pycocotools import mask
import cv2
import copy
from typing import Tuple
color_pool = {
'red': (255, 0, 0),
'lime': (0, 255, 0),
'blue': (0, 0, 255),
'yellow': (255, 255, 0),
'fuchsia': (255, 0, 255),
'aqua': (0, 255, 255),
'orange': (255, 165, 0),
'purple': (128, 0, 128),
'gold': (255, 215, 0),
}
def get_random_point_within_polygon(polygon):
minx, miny, maxx, maxy = polygon.bounds
trial_num = 0
while True:
if trial_num < 50:
x = np.random.uniform(minx, maxx)
y = np.random.uniform(miny, maxy)
point = Point(x, y)
if polygon.contains(point):
return x, y
trial_num += 1
else:
x = np.random.uniform(minx, maxx)
y = np.random.uniform(miny, maxy)
return x, y
def get_random_point_within_bbox(bbox):
left, top, right, bottom = bbox
x = np.random.uniform(left, right)
y = np.random.uniform(top, bottom)
return x, y
def is_max_angle_less_than_150(points):
for i in range(3):
p1 = np.array(points[i])
p2 = np.array(points[(i + 1) % 3])
p3 = np.array(points[(i + 2) % 3])
a = np.linalg.norm(p3 - p2)
b = np.linalg.norm(p1 - p3)
c = np.linalg.norm(p1 - p2)
# Calculate angle at p2 using cosine rule
angle_at_p2 = np.degrees(np.arccos((a**2 + c**2 - b**2) / (2*a*c)))
if angle_at_p2 > 150:
return False
return True
def draw_rectangle(canvas, bbox_coord, outline_color, width):
left, top, right, bottom = bbox_coord
canvas.rectangle([(left, top), (right, bottom)], outline=outline_color, width=width)
def draw_ellipse(canvas, bbox_coord, mask_polygon, outline_color, width, size_ratio=1, aspect_ratio=1.0):
if mask_polygon != None:
minx, miny, maxx, maxy = mask_polygon.bounds
else:
minx, miny, maxx, maxy = bbox_coord
# Calculate the center of the bounding box
center_x = (maxx + minx) / 2
center_y = (maxy + miny) / 2
# Calculate the dimensions of the new bounding box
new_width = (maxx - minx) * size_ratio * aspect_ratio
new_height = (maxy - miny) * size_ratio / aspect_ratio
# Calculate the new minx, miny, maxx, maxy based on the new dimensions
minx = center_x - new_width / 2
miny = center_y - new_height / 2
maxx = center_x + new_width / 2
maxy = center_y + new_height / 2
# Draw the ellipse
bbox = [minx, miny, maxx, maxy]
canvas.ellipse(bbox, outline=outline_color, width=width)
def draw_arrow(canvas, bbox_coord, outline_color, line_width, max_arrow_length=100):
left, top, right, bottom = bbox_coord
center_x = (left + right) / 2
center_y = (top + bottom) / 2
# Arrow length related to the bounding box size
bounding_box_size_length = min(right - left, bottom - top)
if 0.8 * bounding_box_size_length > max_arrow_length:
min_arrow_length = 0.8 * bounding_box_size_length
else:
min_arrow_length = max_arrow_length
max_arrow_length = 0.8 * bounding_box_size_length
arrow_length = random.uniform(min_arrow_length, max_arrow_length)
# Randomize the arrow angle
angle = random.uniform(0, 2 * math.pi)
center_x += random.uniform(-0.25, 0.25) * (right - left)
center_y += random.uniform(-0.25, 0.25) * (bottom - top)
# Arrowhead size related to arrow length
arrow_head_size = max(random.uniform(0.2, 0.5) * arrow_length, 6)
# Recalculate the arrow end to ensure it connects properly with the arrowhead
arrow_end_x = center_x + (arrow_length - arrow_head_size) * math.cos(angle)
arrow_end_y = center_y + (arrow_length - arrow_head_size) * math.sin(angle)
if random.random() < 0.5:
# Draw with a "wobble" to mimic human drawing
mid_x = (center_x + arrow_end_x) / 2 + random.uniform(-5, 5)
mid_y = (center_y + arrow_end_y) / 2 + random.uniform(-5, 5)
canvas.line([(center_x, center_y), (mid_x, mid_y), (arrow_end_x, arrow_end_y)],
fill=outline_color, width=line_width)
else:
# Draw the arrow line
canvas.line([(center_x, center_y), (arrow_end_x, arrow_end_y)], fill=outline_color, width=line_width)
arrow_end_x = center_x
arrow_end_y = center_y
# Draw the arrow head
if random.random() < 0.5:
canvas.polygon([
(arrow_end_x + arrow_head_size * math.cos(angle + math.pi / 3),
arrow_end_y + arrow_head_size * math.sin(angle + math.pi / 3)),
(arrow_end_x, arrow_end_y),
(arrow_end_x + arrow_head_size * math.cos(angle - math.pi / 3),
arrow_end_y + arrow_head_size * math.sin(angle - math.pi / 3))
], fill=outline_color)
else:
canvas.line([
(arrow_end_x + arrow_head_size * math.cos(angle + math.pi / 3),
arrow_end_y + arrow_head_size * math.sin(angle + math.pi / 3)),
(arrow_end_x, arrow_end_y),
(arrow_end_x + arrow_head_size * math.cos(angle - math.pi / 3),
arrow_end_y + arrow_head_size * math.sin(angle - math.pi / 3))
], fill=outline_color, width=line_width)
def draw_rounded_triangle(canvas, bbox_coord, mask_polygon, outline_color, width):
while True:
points = []
for _ in range(3):
if mask_polygon != None:
point = get_random_point_within_polygon(mask_polygon)
else:
point = get_random_point_within_polygon(bbox_coord)
points.append(point)
if is_max_angle_less_than_150(points):
break
canvas.line([points[0], points[1], points[2], points[0]], fill=outline_color, width=width, joint='curve')
def draw_point(canvas, bbox_coord, mask_polygon, outline_color=(255, 0, 0), radius=3, aspect_ratio=1.0):
# Calculate the center and covariance matrix for multivariate normal distribution
if mask_polygon != None:
minx, miny, maxx, maxy = mask_polygon.bounds
else:
minx, miny, maxx, maxy = bbox_coord
mean = [(maxx + minx) / 2, (maxy + miny) / 2]
cov = [[(maxx - minx) / 8, 0], [0, (maxy - miny) / 8]]
# Initialize counter for fail-safe mechanism
counter = 0
# Generate a random central point within the mask using a normal distribution
max_tries = 10
while True:
cx, cy = multivariate_normal.rvs(mean=mean, cov=cov)
center_point = Point(cx, cy)
if mask_polygon.contains(center_point):
break
counter += 1
if counter >= max_tries:
cx, cy = multivariate_normal.rvs(mean=mean, cov=cov)
center_point = Point(cx, cy)
break
x_radius = radius * aspect_ratio
y_radius = radius / aspect_ratio
bbox = [cx - x_radius, cy - y_radius, cx + x_radius, cy + y_radius]
# Draw the ellipse and fill it with color
canvas.ellipse(bbox, outline=outline_color, fill=outline_color)
def draw_scribble(canvas, bbox_coord, mask_polygon, outline_color=(255, 0, 0), width=3):
prev_point = None # Initailize prev_point outside the loop
if mask_polygon != None:
p0 = get_random_point_within_polygon(mask_polygon)
p1 = get_random_point_within_polygon(mask_polygon)
p2 = get_random_point_within_polygon(mask_polygon)
p3 = get_random_point_within_polygon(mask_polygon)
else:
p0 = get_random_point_within_bbox(bbox_coord)
p1 = get_random_point_within_bbox(bbox_coord)
p2 = get_random_point_within_bbox(bbox_coord)
p3 = get_random_point_within_bbox(bbox_coord)
for t in np.linspace(0, 1, 1000):
x = (1 - t)**3 * p0[0] + 3 * (1 - t)**2 * t * p1[0] + 3 * (1 - t) * t**2 * p2[0] + t**3 * p3[0]
y = (1 - t)**3 * p0[1] + 3 * (1 - t)**2 * t * p1[1] + 3 * (1 - t) * t**2 * p2[1] + t**3 * p3[1]
current_point = (x, y)
if prev_point:
canvas.line([prev_point, current_point], fill=outline_color, width=width)
prev_point = current_point # Update prev_point to the current ending point
def draw_mask_contour(canvas, bbox_coord, segmentation_coords, color="red", width=1):
if segmentation_coords == None:
segmentation_coords = [[bbox_coord[0], bbox_coord[1], bbox_coord[0], bbox_coord[3],
bbox_coord[2], bbox_coord[3], bbox_coord[2], bbox_coord[1]]]
for segment in segmentation_coords:
coords = [(segment[i], segment[i+1]) for i in range(0, len(segment), 2)]
for dx in range(-width, width+1):
for dy in range(-width, width+1):
shifted_coords = [(x + dx, y + dy) for x, y in coords]
canvas.polygon(shifted_coords, outline=color)
def draw_mask(canvas, bbox_coord, segmentation_coords, color="red", width=1):
for segment in segmentation_coords:
coords = [(segment[i], segment[i+1]) for i in range(0, len(segment), 2)]
canvas.polygon(coords, outline=None, fill=color, width=width)
def image_blending(image, shape='rectangle', bbox_coord=None, segmentation=None,
ori_height=None, ori_width=None, alpha=None, rgb_value=None):
visual_prompt_img = Image.new('RGBA', (ori_width, ori_height), (0, 0, 0, 0))
visual_prompt_img_canvas = ImageDraw.Draw(visual_prompt_img)
if alpha == None:
alpha = random.randint(96, 255) if shape != 'mask' else random.randint(48, 128)
color_alpha = rgb_value + (alpha, )
if isinstance(segmentation, dict):
if isinstance(segmentation['counts'], list):
# convert to compressed RLE
segmentation = mask.frPyObjects(segmentation, ori_height, ori_width)
m = mask.decode(segmentation)
m = m.astype(np.uint8).squeeze()
contours, hierarchy = cv2.findContours(m, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = [contour.flatten() for contour in contours]
try:
polygons = []
for contour in contours:
mask_polygon = Polygon([(contour[i], contour[i+1]) for i in range(0, len(contour), 2)])
polygons.append(mask_polygon)
mask_polygon = random.choice(polygons)
try:
all_polygons_union = unary_union(polygons)
except:
all_polygons_union = None
except:
mask_polygon = None
elif segmentation:
contours = segmentation
try:
polygons = []
for segmentation_coord in segmentation:
mask_polygon = Polygon([(segmentation_coord[i], segmentation_coord[i+1])
for i in range(0, len(segmentation_coord), 2)])
polygons.append(mask_polygon)
mask_polygon = polygons[0]
try:
all_polygons_union = unary_union(polygons)
except:
all_polygons_union = None
except:
mask_polygon = None
else:
contours = None
all_polygons_union = None
mask_polygon = None
if shape == 'rectangle':
line_width = random.choice([2, 3, 4, 5, 6, 7, 8])
draw_rectangle(visual_prompt_img_canvas, bbox_coord, color_alpha, line_width)
elif shape == 'ellipse':
line_width = random.choice([2, 3, 4, 5, 6, 7, 8])
size_ratio = random.uniform(1, 1.5)
draw_ellipse(visual_prompt_img_canvas, bbox_coord, all_polygons_union,
color_alpha, line_width, size_ratio=size_ratio)
elif shape == 'arrow':
line_width = random.choice([1, 2, 3, 4, 5, 6])
max_arrow_length = 50
draw_arrow(visual_prompt_img_canvas, bbox_coord, color_alpha, line_width, max_arrow_length)
elif shape == 'triangle':
line_width = random.choice([2, 3, 4, 5, 6, 7, 8])
draw_rounded_triangle(visual_prompt_img_canvas, bbox_coord, all_polygons_union, color_alpha, line_width)
elif shape == 'point':
radius = random.choice(list(range(3, 10)))
aspect_ratio = 1 if random.random() < 0.5 else random.uniform(0.5, 2.0)
draw_point(visual_prompt_img_canvas, bbox_coord, mask_polygon, color_alpha, radius, aspect_ratio)
elif shape == 'scribble':
line_width = random.choice(list(range(2, 13)))
draw_scribble(visual_prompt_img_canvas, bbox_coord, mask_polygon, color_alpha, line_width)
elif shape == 'mask_contour':
line_width = random.choice([1, 2, 3, 4])
draw_mask_contour(visual_prompt_img_canvas, bbox_coord, contours, color_alpha, line_width)
else:
raise NotImplementedError
image = image.convert('RGBA')
image = Image.alpha_composite(image, visual_prompt_img)
image = image.convert('RGB')
visual_prompt_img = np.array(visual_prompt_img.convert('RGB'))
visual_prompt_img = np.uint8(np.sum(visual_prompt_img, axis=-1) > 10)
return image, visual_prompt_img
def point_rendering(points, colors, ori_height, ori_width):
merged_visual_prompts = Image.new('RGB', (ori_width, ori_height), (0, 0, 0))
radius = random.choice(list(range(3, 11)))
aspect_ratio = 1 if random.random() < 0.5 else random.uniform(0.5, 2.0)
alpha = random.randint(96, 255)
_regions = []
for i, point in enumerate(points):
vprompt_img = Image.new('RGBA', (ori_width, ori_height), (0, 0, 0, 0))
canvas = ImageDraw.Draw(vprompt_img)
color = (int(colors[i][0] * 255), int(colors[i][1] * 255), int(colors[i][2] * 255))
if color[0] == 0 and color[1] == 0 and color[2] == 0:
color = (int(colors[-1][0] * 255), int(colors[-1][1] * 255), int(colors[-1][2] * 255))
color_alpha = color + (alpha, )
for _point in point:
cx, cy = _point[0], _point[1]
x_radius = radius * aspect_ratio
y_radius = radius * aspect_ratio
bbox = [cx - x_radius, cy - y_radius, cx + x_radius, cy + y_radius]
canvas.ellipse(bbox, outline=color_alpha, fill=color_alpha)
merged_visual_prompts = merged_visual_prompts.convert('RGBA')
merged_visual_prompts = Image.alpha_composite(merged_visual_prompts, vprompt_img)
merged_visual_prompts = merged_visual_prompts.convert('RGB')
vprompt_img = np.array(vprompt_img.convert('RGB'))
vprompt_img = np.uint8(np.sum(vprompt_img, axis=-1) > 10)
_regions.append(vprompt_img)
_regions = np.stack(_regions, axis=0) # n, h, w
return _regions, merged_visual_prompts
def box_rendering(boxes, colors, ori_height, ori_width):
merged_visual_prompts = Image.new('RGB', (ori_width, ori_height), (0, 0, 0))
# merged_visual_prompts = image
alpha = random.randint(96, 255)
line_width = random.choice([2, 3, 4, 5, 6, 7,])
_regions = []
for i, box in enumerate(boxes):
vprompt_img = Image.new('RGBA', (ori_width, ori_height), (0, 0, 0, 0))
canvas = ImageDraw.Draw(vprompt_img)
color = (int(colors[i][0] * 255), int(colors[i][1] * 255), int(colors[i][2] * 255))
if color[0] == 0 and color[1] == 0 and color[2] == 0:
color = (int(colors[-1][0] * 255), int(colors[-1][1] * 255), int(colors[-1][2] * 255))
color_alpha = color + (alpha, )
left, top, right, bottom = box
canvas.rectangle([(left, top), (right, bottom)], outline=color_alpha, width=line_width)
merged_visual_prompts = merged_visual_prompts.convert('RGBA')
merged_visual_prompts = Image.alpha_composite(merged_visual_prompts, vprompt_img)
merged_visual_prompts = merged_visual_prompts.convert('RGB')
vprompt_img = np.array(vprompt_img.convert('RGB'))
vprompt_img = np.uint8(np.sum(vprompt_img, axis=-1) > 10)
_regions.append(vprompt_img)
_regions = np.stack(_regions, axis=0) # n, h, w
return _regions, merged_visual_prompts
from distinctipy import distinctipy
def contour_rendering(image, masks, mask_ids=None):
colors = distinctipy.get_colors(len(masks)+1)
font = cv2.FONT_HERSHEY_SIMPLEX
text_thickness = 2
font_scale_list = []
label_list = []
color_list = []
label_loc_list = []
for anno_i in range(len(masks)):
mask = masks[anno_i]
contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if colors[anno_i][0] > 0.9 and colors[anno_i][1] > 0.9 and colors[anno_i][2] > 0.9:
color_anno_i = (colors[-1][2] * 255, colors[-1][1] * 255, colors[-1][0] * 255)
else:
color_anno_i = (colors[anno_i][2] * 255, colors[anno_i][1] * 255, colors[anno_i][0] * 255)
cv2.drawContours(image, contours, -1, color=color_anno_i, thickness=2)
cnt_area = []
cnt_centroid = []
cnt_bbox = []
for cnt in contours:
cnt_area.append(cv2.contourArea(cnt))
M = cv2.moments(cnt)
x, y, w, h = cv2.boundingRect(cnt)
if M["m00"] > 0:
cx = int(M["m10"] / M["m00"])
cy = int(M["m01"] / M["m00"])
else:
cx, cy = x + w/2, y + h/2
cnt_centroid.append((cx, cy))
cnt_bbox.append((w, h))
select_cnt = 0
if len(cnt_area) > 1:
select_cnt = np.argmax(np.array(cnt_area))
select_centroid = cnt_centroid[select_cnt]
visual_prompt_id = anno_i+1 if mask_ids is None else mask_ids[anno_i]
boxW, boxH = cnt_bbox[select_cnt]
if max(boxH, boxW) < 25:
thickness=1
else:
thickness=text_thickness
# find the optimal font scale: text width/height close to 1/5 of the bbox width/height
ok = False
for scale in reversed(range(5, 60, 1)):
textSize = cv2.getTextSize(f"{visual_prompt_id}", font, scale/10, thickness)
textW, textH = textSize[0][0], textSize[0][1]
if textH / boxH > 0.15 or textW / boxW > 0.15:
continue
font_scale_list.append(scale/10)
ok = True
break
if not ok:
font_scale_list.append(0.5)
label_list.append(visual_prompt_id)
color_list.append(color_anno_i)
(base_w, base_h), bottom = cv2.getTextSize(f"{visual_prompt_id}", font, font_scale_list[-1], thickness)
label_loc_list.append((
int(select_centroid[0] - base_w/2),
int(select_centroid[1] + (base_h+bottom)/2)
))
font_scale = min(font_scale_list)
for anno_i in range(len(label_list)):
(base_w, base_h), bottom = cv2.getTextSize(f"{label_list[anno_i]}", font, font_scale, thickness)
cv2.rectangle(image, (label_loc_list[anno_i][0], int(label_loc_list[anno_i][1]-base_h-bottom/2)),
(label_loc_list[anno_i][0]+base_w, int(label_loc_list[anno_i][1]+bottom/2)),
color_list[anno_i], -1, 8)
cv2.putText(image, f"{label_list[anno_i]}", label_loc_list[anno_i], font, font_scale,
(255, 255, 255), thickness)
return None