DenseLabelDev / projects /mllm_labeling /datasets /utils /image_blending_fn.py

Upload folder using huggingface_hub

032e687 verified 7 months ago

19.3 kB

	from PIL import Image, ImageDraw
	import random, math
	import numpy as np
	from shapely.ops import unary_union
	from shapely.geometry import Point, Polygon
	from scipy.stats import multivariate_normal
	from pycocotools import mask
	import cv2
	import copy
	from typing import Tuple


	color_pool = {
	'red': (255, 0, 0),
	'lime': (0, 255, 0),
	'blue': (0, 0, 255),
	'yellow': (255, 255, 0),
	'fuchsia': (255, 0, 255),
	'aqua': (0, 255, 255),
	'orange': (255, 165, 0),
	'purple': (128, 0, 128),
	'gold': (255, 215, 0),

	}


	def get_random_point_within_polygon(polygon):
	minx, miny, maxx, maxy = polygon.bounds
	trial_num = 0
	while True:
	if trial_num < 50:
	x = np.random.uniform(minx, maxx)
	y = np.random.uniform(miny, maxy)
	point = Point(x, y)
	if polygon.contains(point):
	return x, y
	trial_num += 1
	else:
	x = np.random.uniform(minx, maxx)
	y = np.random.uniform(miny, maxy)
	return x, y

	def get_random_point_within_bbox(bbox):
	left, top, right, bottom = bbox
	x = np.random.uniform(left, right)
	y = np.random.uniform(top, bottom)
	return x, y

	def is_max_angle_less_than_150(points):
	for i in range(3):
	p1 = np.array(points[i])
	p2 = np.array(points[(i + 1) % 3])
	p3 = np.array(points[(i + 2) % 3])

	a = np.linalg.norm(p3 - p2)
	b = np.linalg.norm(p1 - p3)
	c = np.linalg.norm(p1 - p2)

	# Calculate angle at p2 using cosine rule
	angle_at_p2 = np.degrees(np.arccos((a2 + c2 - b*2) / (2a*c)))

	if angle_at_p2 > 150:
	return False
	return True

	def draw_rectangle(canvas, bbox_coord, outline_color, width):
	left, top, right, bottom = bbox_coord
	canvas.rectangle([(left, top), (right, bottom)], outline=outline_color, width=width)

	def draw_ellipse(canvas, bbox_coord, mask_polygon, outline_color, width, size_ratio=1, aspect_ratio=1.0):
	if mask_polygon != None:
	minx, miny, maxx, maxy = mask_polygon.bounds
	else:
	minx, miny, maxx, maxy = bbox_coord

	# Calculate the center of the bounding box
	center_x = (maxx + minx) / 2
	center_y = (maxy + miny) / 2

	# Calculate the dimensions of the new bounding box
	new_width = (maxx - minx) * size_ratio * aspect_ratio
	new_height = (maxy - miny) * size_ratio / aspect_ratio

	# Calculate the new minx, miny, maxx, maxy based on the new dimensions
	minx = center_x - new_width / 2
	miny = center_y - new_height / 2
	maxx = center_x + new_width / 2
	maxy = center_y + new_height / 2

	# Draw the ellipse
	bbox = [minx, miny, maxx, maxy]
	canvas.ellipse(bbox, outline=outline_color, width=width)

	def draw_arrow(canvas, bbox_coord, outline_color, line_width, max_arrow_length=100):
	left, top, right, bottom = bbox_coord
	center_x = (left + right) / 2
	center_y = (top + bottom) / 2

	# Arrow length related to the bounding box size
	bounding_box_size_length = min(right - left, bottom - top)
	if 0.8 * bounding_box_size_length > max_arrow_length:
	min_arrow_length = 0.8 * bounding_box_size_length
	else:
	min_arrow_length = max_arrow_length
	max_arrow_length = 0.8 * bounding_box_size_length
	arrow_length = random.uniform(min_arrow_length, max_arrow_length)

	# Randomize the arrow angle
	angle = random.uniform(0, 2 * math.pi)
	center_x += random.uniform(-0.25, 0.25) * (right - left)
	center_y += random.uniform(-0.25, 0.25) * (bottom - top)

	# Arrowhead size related to arrow length
	arrow_head_size = max(random.uniform(0.2, 0.5) * arrow_length, 6)

	# Recalculate the arrow end to ensure it connects properly with the arrowhead
	arrow_end_x = center_x + (arrow_length - arrow_head_size) * math.cos(angle)
	arrow_end_y = center_y + (arrow_length - arrow_head_size) * math.sin(angle)

	if random.random() < 0.5:
	# Draw with a "wobble" to mimic human drawing
	mid_x = (center_x + arrow_end_x) / 2 + random.uniform(-5, 5)
	mid_y = (center_y + arrow_end_y) / 2 + random.uniform(-5, 5)
	canvas.line([(center_x, center_y), (mid_x, mid_y), (arrow_end_x, arrow_end_y)],
	fill=outline_color, width=line_width)
	else:
	# Draw the arrow line
	canvas.line([(center_x, center_y), (arrow_end_x, arrow_end_y)], fill=outline_color, width=line_width)
	arrow_end_x = center_x
	arrow_end_y = center_y
	# Draw the arrow head
	if random.random() < 0.5:
	canvas.polygon([
	(arrow_end_x + arrow_head_size * math.cos(angle + math.pi / 3),
	arrow_end_y + arrow_head_size * math.sin(angle + math.pi / 3)),
	(arrow_end_x, arrow_end_y),
	(arrow_end_x + arrow_head_size * math.cos(angle - math.pi / 3),
	arrow_end_y + arrow_head_size * math.sin(angle - math.pi / 3))
	], fill=outline_color)
	else:
	canvas.line([
	(arrow_end_x + arrow_head_size * math.cos(angle + math.pi / 3),
	arrow_end_y + arrow_head_size * math.sin(angle + math.pi / 3)),
	(arrow_end_x, arrow_end_y),
	(arrow_end_x + arrow_head_size * math.cos(angle - math.pi / 3),
	arrow_end_y + arrow_head_size * math.sin(angle - math.pi / 3))
	], fill=outline_color, width=line_width)

	def draw_rounded_triangle(canvas, bbox_coord, mask_polygon, outline_color, width):
	while True:
	points = []
	for _ in range(3):
	if mask_polygon != None:
	point = get_random_point_within_polygon(mask_polygon)
	else:
	point = get_random_point_within_polygon(bbox_coord)
	points.append(point)
	if is_max_angle_less_than_150(points):
	break
	canvas.line([points[0], points[1], points[2], points[0]], fill=outline_color, width=width, joint='curve')

	def draw_point(canvas, bbox_coord, mask_polygon, outline_color=(255, 0, 0), radius=3, aspect_ratio=1.0):
	# Calculate the center and covariance matrix for multivariate normal distribution
	if mask_polygon != None:
	minx, miny, maxx, maxy = mask_polygon.bounds
	else:
	minx, miny, maxx, maxy = bbox_coord
	mean = [(maxx + minx) / 2, (maxy + miny) / 2]
	cov = [[(maxx - minx) / 8, 0], [0, (maxy - miny) / 8]]

	# Initialize counter for fail-safe mechanism
	counter = 0

	# Generate a random central point within the mask using a normal distribution
	max_tries = 10
	while True:
	cx, cy = multivariate_normal.rvs(mean=mean, cov=cov)
	center_point = Point(cx, cy)
	if mask_polygon.contains(center_point):
	break
	counter += 1
	if counter >= max_tries:
	cx, cy = multivariate_normal.rvs(mean=mean, cov=cov)
	center_point = Point(cx, cy)
	break

	x_radius = radius * aspect_ratio
	y_radius = radius / aspect_ratio
	bbox = [cx - x_radius, cy - y_radius, cx + x_radius, cy + y_radius]

	# Draw the ellipse and fill it with color
	canvas.ellipse(bbox, outline=outline_color, fill=outline_color)

	def draw_scribble(canvas, bbox_coord, mask_polygon, outline_color=(255, 0, 0), width=3):
	prev_point = None # Initailize prev_point outside the loop
	if mask_polygon != None:
	p0 = get_random_point_within_polygon(mask_polygon)
	p1 = get_random_point_within_polygon(mask_polygon)
	p2 = get_random_point_within_polygon(mask_polygon)
	p3 = get_random_point_within_polygon(mask_polygon)
	else:
	p0 = get_random_point_within_bbox(bbox_coord)
	p1 = get_random_point_within_bbox(bbox_coord)
	p2 = get_random_point_within_bbox(bbox_coord)
	p3 = get_random_point_within_bbox(bbox_coord)

	for t in np.linspace(0, 1, 1000):
	x = (1 - t)*3 p0[0] + 3 * (1 - t)*2 t * p1[0] + 3 * (1 - t) * t*2 p2[0] + t*3 p3[0]
	y = (1 - t)*3 p0[1] + 3 * (1 - t)*2 t * p1[1] + 3 * (1 - t) * t*2 p2[1] + t*3 p3[1]

	current_point = (x, y)
	if prev_point:
	canvas.line([prev_point, current_point], fill=outline_color, width=width)

	prev_point = current_point # Update prev_point to the current ending point

	def draw_mask_contour(canvas, bbox_coord, segmentation_coords, color="red", width=1):
	if segmentation_coords == None:
	segmentation_coords = [[bbox_coord[0], bbox_coord[1], bbox_coord[0], bbox_coord[3],
	bbox_coord[2], bbox_coord[3], bbox_coord[2], bbox_coord[1]]]
	for segment in segmentation_coords:
	coords = [(segment[i], segment[i+1]) for i in range(0, len(segment), 2)]
	for dx in range(-width, width+1):
	for dy in range(-width, width+1):
	shifted_coords = [(x + dx, y + dy) for x, y in coords]
	canvas.polygon(shifted_coords, outline=color)

	def draw_mask(canvas, bbox_coord, segmentation_coords, color="red", width=1):
	for segment in segmentation_coords:
	coords = [(segment[i], segment[i+1]) for i in range(0, len(segment), 2)]
	canvas.polygon(coords, outline=None, fill=color, width=width)



	def image_blending(image, shape='rectangle', bbox_coord=None, segmentation=None,
	ori_height=None, ori_width=None, alpha=None, rgb_value=None):
	visual_prompt_img = Image.new('RGBA', (ori_width, ori_height), (0, 0, 0, 0))
	visual_prompt_img_canvas = ImageDraw.Draw(visual_prompt_img)
	if alpha == None:
	alpha = random.randint(96, 255) if shape != 'mask' else random.randint(48, 128)
	color_alpha = rgb_value + (alpha, )
	if isinstance(segmentation, dict):
	if isinstance(segmentation['counts'], list):
	# convert to compressed RLE
	segmentation = mask.frPyObjects(segmentation, ori_height, ori_width)
	m = mask.decode(segmentation)
	m = m.astype(np.uint8).squeeze()
	contours, hierarchy = cv2.findContours(m, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	contours = [contour.flatten() for contour in contours]
	try:
	polygons = []
	for contour in contours:
	mask_polygon = Polygon([(contour[i], contour[i+1]) for i in range(0, len(contour), 2)])
	polygons.append(mask_polygon)
	mask_polygon = random.choice(polygons)
	try:
	all_polygons_union = unary_union(polygons)
	except:
	all_polygons_union = None
	except:
	mask_polygon = None
	elif segmentation:
	contours = segmentation
	try:
	polygons = []
	for segmentation_coord in segmentation:
	mask_polygon = Polygon([(segmentation_coord[i], segmentation_coord[i+1])
	for i in range(0, len(segmentation_coord), 2)])
	polygons.append(mask_polygon)
	mask_polygon = polygons[0]
	try:
	all_polygons_union = unary_union(polygons)
	except:
	all_polygons_union = None
	except:
	mask_polygon = None
	else:
	contours = None
	all_polygons_union = None
	mask_polygon = None

	if shape == 'rectangle':
	line_width = random.choice([2, 3, 4, 5, 6, 7, 8])
	draw_rectangle(visual_prompt_img_canvas, bbox_coord, color_alpha, line_width)
	elif shape == 'ellipse':
	line_width = random.choice([2, 3, 4, 5, 6, 7, 8])
	size_ratio = random.uniform(1, 1.5)
	draw_ellipse(visual_prompt_img_canvas, bbox_coord, all_polygons_union,
	color_alpha, line_width, size_ratio=size_ratio)
	elif shape == 'arrow':
	line_width = random.choice([1, 2, 3, 4, 5, 6])
	max_arrow_length = 50
	draw_arrow(visual_prompt_img_canvas, bbox_coord, color_alpha, line_width, max_arrow_length)
	elif shape == 'triangle':
	line_width = random.choice([2, 3, 4, 5, 6, 7, 8])
	draw_rounded_triangle(visual_prompt_img_canvas, bbox_coord, all_polygons_union, color_alpha, line_width)
	elif shape == 'point':
	radius = random.choice(list(range(3, 10)))
	aspect_ratio = 1 if random.random() < 0.5 else random.uniform(0.5, 2.0)
	draw_point(visual_prompt_img_canvas, bbox_coord, mask_polygon, color_alpha, radius, aspect_ratio)
	elif shape == 'scribble':
	line_width = random.choice(list(range(2, 13)))
	draw_scribble(visual_prompt_img_canvas, bbox_coord, mask_polygon, color_alpha, line_width)
	elif shape == 'mask_contour':
	line_width = random.choice([1, 2, 3, 4])
	draw_mask_contour(visual_prompt_img_canvas, bbox_coord, contours, color_alpha, line_width)
	else:
	raise NotImplementedError

	image = image.convert('RGBA')
	image = Image.alpha_composite(image, visual_prompt_img)
	image = image.convert('RGB')

	visual_prompt_img = np.array(visual_prompt_img.convert('RGB'))
	visual_prompt_img = np.uint8(np.sum(visual_prompt_img, axis=-1) > 10)

	return image, visual_prompt_img


	def point_rendering(points, colors, ori_height, ori_width):
	merged_visual_prompts = Image.new('RGB', (ori_width, ori_height), (0, 0, 0))
	radius = random.choice(list(range(3, 11)))
	aspect_ratio = 1 if random.random() < 0.5 else random.uniform(0.5, 2.0)
	alpha = random.randint(96, 255)

	_regions = []
	for i, point in enumerate(points):
	vprompt_img = Image.new('RGBA', (ori_width, ori_height), (0, 0, 0, 0))
	canvas = ImageDraw.Draw(vprompt_img)
	color = (int(colors[i][0] * 255), int(colors[i][1] * 255), int(colors[i][2] * 255))
	if color[0] == 0 and color[1] == 0 and color[2] == 0:
	color = (int(colors[-1][0] * 255), int(colors[-1][1] * 255), int(colors[-1][2] * 255))
	color_alpha = color + (alpha, )
	for _point in point:
	cx, cy = _point[0], _point[1]
	x_radius = radius * aspect_ratio
	y_radius = radius * aspect_ratio
	bbox = [cx - x_radius, cy - y_radius, cx + x_radius, cy + y_radius]
	canvas.ellipse(bbox, outline=color_alpha, fill=color_alpha)
	merged_visual_prompts = merged_visual_prompts.convert('RGBA')
	merged_visual_prompts = Image.alpha_composite(merged_visual_prompts, vprompt_img)
	merged_visual_prompts = merged_visual_prompts.convert('RGB')

	vprompt_img = np.array(vprompt_img.convert('RGB'))
	vprompt_img = np.uint8(np.sum(vprompt_img, axis=-1) > 10)
	_regions.append(vprompt_img)
	_regions = np.stack(_regions, axis=0) # n, h, w

	return _regions, merged_visual_prompts

	def box_rendering(boxes, colors, ori_height, ori_width):
	merged_visual_prompts = Image.new('RGB', (ori_width, ori_height), (0, 0, 0))
	# merged_visual_prompts = image
	alpha = random.randint(96, 255)
	line_width = random.choice([2, 3, 4, 5, 6, 7,])

	_regions = []
	for i, box in enumerate(boxes):
	vprompt_img = Image.new('RGBA', (ori_width, ori_height), (0, 0, 0, 0))
	canvas = ImageDraw.Draw(vprompt_img)
	color = (int(colors[i][0] * 255), int(colors[i][1] * 255), int(colors[i][2] * 255))
	if color[0] == 0 and color[1] == 0 and color[2] == 0:
	color = (int(colors[-1][0] * 255), int(colors[-1][1] * 255), int(colors[-1][2] * 255))
	color_alpha = color + (alpha, )

	left, top, right, bottom = box
	canvas.rectangle([(left, top), (right, bottom)], outline=color_alpha, width=line_width)

	merged_visual_prompts = merged_visual_prompts.convert('RGBA')
	merged_visual_prompts = Image.alpha_composite(merged_visual_prompts, vprompt_img)
	merged_visual_prompts = merged_visual_prompts.convert('RGB')

	vprompt_img = np.array(vprompt_img.convert('RGB'))
	vprompt_img = np.uint8(np.sum(vprompt_img, axis=-1) > 10)
	_regions.append(vprompt_img)
	_regions = np.stack(_regions, axis=0) # n, h, w

	return _regions, merged_visual_prompts

	from distinctipy import distinctipy
	def contour_rendering(image, masks, mask_ids=None):
	colors = distinctipy.get_colors(len(masks)+1)
	font = cv2.FONT_HERSHEY_SIMPLEX
	text_thickness = 2
	font_scale_list = []
	label_list = []
	color_list = []
	label_loc_list = []
	for anno_i in range(len(masks)):
	mask = masks[anno_i]
	contours, hierarchy = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

	if colors[anno_i][0] > 0.9 and colors[anno_i][1] > 0.9 and colors[anno_i][2] > 0.9:
	color_anno_i = (colors[-1][2] * 255, colors[-1][1] * 255, colors[-1][0] * 255)
	else:
	color_anno_i = (colors[anno_i][2] * 255, colors[anno_i][1] * 255, colors[anno_i][0] * 255)

	cv2.drawContours(image, contours, -1, color=color_anno_i, thickness=2)

	cnt_area = []
	cnt_centroid = []
	cnt_bbox = []
	for cnt in contours:
	cnt_area.append(cv2.contourArea(cnt))
	M = cv2.moments(cnt)
	x, y, w, h = cv2.boundingRect(cnt)
	if M["m00"] > 0:
	cx = int(M["m10"] / M["m00"])
	cy = int(M["m01"] / M["m00"])
	else:
	cx, cy = x + w/2, y + h/2
	cnt_centroid.append((cx, cy))
	cnt_bbox.append((w, h))
	select_cnt = 0
	if len(cnt_area) > 1:
	select_cnt = np.argmax(np.array(cnt_area))
	select_centroid = cnt_centroid[select_cnt]
	visual_prompt_id = anno_i+1 if mask_ids is None else mask_ids[anno_i]
	boxW, boxH = cnt_bbox[select_cnt]
	if max(boxH, boxW) < 25:
	thickness=1
	else:
	thickness=text_thickness

	# find the optimal font scale: text width/height close to 1/5 of the bbox width/height
	ok = False
	for scale in reversed(range(5, 60, 1)):
	textSize = cv2.getTextSize(f"{visual_prompt_id}", font, scale/10, thickness)
	textW, textH = textSize[0][0], textSize[0][1]
	if textH / boxH > 0.15 or textW / boxW > 0.15:
	continue
	font_scale_list.append(scale/10)
	ok = True
	break
	if not ok:
	font_scale_list.append(0.5)
	label_list.append(visual_prompt_id)
	color_list.append(color_anno_i)

	(base_w, base_h), bottom = cv2.getTextSize(f"{visual_prompt_id}", font, font_scale_list[-1], thickness)
	label_loc_list.append((
	int(select_centroid[0] - base_w/2),
	int(select_centroid[1] + (base_h+bottom)/2)
	))
	font_scale = min(font_scale_list)
	for anno_i in range(len(label_list)):
	(base_w, base_h), bottom = cv2.getTextSize(f"{label_list[anno_i]}", font, font_scale, thickness)
	cv2.rectangle(image, (label_loc_list[anno_i][0], int(label_loc_list[anno_i][1]-base_h-bottom/2)),
	(label_loc_list[anno_i][0]+base_w, int(label_loc_list[anno_i][1]+bottom/2)),
	color_list[anno_i], -1, 8)
	cv2.putText(image, f"{label_list[anno_i]}", label_loc_list[anno_i], font, font_scale,
	(255, 255, 255), thickness)

	return None