Spaces:

xichenhku
/

MimicBrush

Running on Zero

App Files Files Community

MimicBrush / data_utils.py

xichenhku

Upload 162 files

81d8e7c verified 5 months ago

raw

history blame

16.5 kB

	import numpy as np
	import torch
	import cv2
	import random
	from PIL import Image


	def gaussian_blure(img, intens = 5):
	"""
	高斯模糊
	:param image_path:
	:intens 5,10,15,20
	:return:
	"""
	img = np.array(img).astype(np.uint8)
	result = cv2.GaussianBlur(img, (0, 0), intens)
	result = Image.fromarray(result)
	return result

	def random_mask(mask):
	h,w = mask.shape[0], mask.shape[1]
	mask_black = np.zeros_like(mask)
	box_w = random.uniform(0.4, 0.9) * w
	box_h = random.uniform(0.4, 0.9) * h
	box_w = int(box_w)
	box_h = int(box_h)
	y1 = random.randint(0, h - box_h)
	y2 = y1 + box_h
	x1 = random.randint(0, w - box_w)
	x2 = x1 + box_w
	mask_black[y1:y2,x1:x2] = 1
	mask_black = mask_black.astype(np.uint8)
	return mask_black

	'''
	def random_mask_grid(mask, p=0.50):
	# 创建一个 h x w 的全零数组，作为初始掩膜
	h,w = mask.shape[0],mask.shape[1]
	mask = np.zeros((h, w), dtype=np.uint8)
	n = random.choice([3,4,5,6,7,8,9,10])

	# 计算小块的大小
	block_h = h // n
	block_w = w // n

	# 在每个小块中以概率 p 设置为 1
	for i in range(n):
	for j in range(n):
	if np.random.rand() < p:
	mask[iblock_h:(i+1)block_h, jblock_w:(j+1)block_w] = 1
	return mask
	'''

	def get_SIFT(image):
	orb = cv2.ORB_create(nfeatures=200, edgeThreshold=50)
	keypoint, descriptor = orb.detectAndCompute(image, None)
	coordinates = [(int(kp.pt[1]), int(kp.pt[0])) for kp in keypoint]
	return coordinates


	'''
	def random_mask_grid(mask, points_list, p=0.0):
	# 创建一个 h x w 的全零数组，作为初始掩膜
	h, w = mask.shape[:2]
	mask = np.zeros((h, w), dtype=np.uint8)
	n = random.choice([3,4,5,6,7,8,9,10])

	# 计算小块的大小
	block_h = h // n
	block_w = w // n

	# 统计每个小块内的点个数
	block_counts = np.zeros((n, n), dtype=np.int32)
	for point in points_list:
	y, x = point
	i = min(y // block_h, n-1)
	j = min(x // block_w, n-1)
	block_counts[i, j] += 1

	# 找出包含点最多的前5个小块
	top5_blocks = np.argpartition(-block_counts.flatten(), 5)[:5]

	# 将这些小块对应的像素设为1
	for idx in top5_blocks:
	i, j = divmod(idx, n)
	mask[iblock_h:(i+1)block_h, jblock_w:(j+1)block_w] = 1

	# 在其他小块中按照概率p设置为1
	for i in range(n):
	for j in range(n):
	if (i*n + j) not in top5_blocks and np.random.rand() < p:
	mask[iblock_h:(i+1)block_h, jblock_w:(j+1)block_w] = 1

	return mask
	'''

	def random_mask_grid(mask, points_list, p=0.50, top5_p=0.70, other_p=0.30):
	# 创建一个 h x w 的全零数组，作为初始掩膜
	h, w = mask.shape[:2]
	mask = np.zeros((h, w), dtype=np.uint8)
	n = random.choice([3,4,5,6,7,8,9,10])

	# 计算小块的大小
	block_h = h // n
	block_w = w // n

	# 统计每个小块内的点个数
	block_counts = np.zeros((n, n), dtype=np.int32)
	for point in points_list:
	y, x = point
	i = min(y // block_h, n-1)
	j = min(x // block_w, n-1)
	block_counts[i, j] += 1

	# 找出包含点最多的前5个小块
	top5_blocks = np.argpartition(-block_counts.flatten(), 5)[:5]

	# 将这些小块对应的像素设为1
	for idx in top5_blocks:
	i, j = divmod(idx, n)
	if np.random.rand() < top5_p:
	mask[iblock_h:(i+1)block_h, jblock_w:(j+1)block_w] = 1

	# 在其他小块中按照概率p设置为1
	for i in range(n):
	for j in range(n):
	if (i*n + j) not in top5_blocks and np.random.rand() < other_p:
	mask[iblock_h:(i+1)block_h, jblock_w:(j+1)block_w] = 1

	return mask

	def random_perspective_transform(image, intensity):
	"""
	对图像进行随机透视变换

	参数:
	image: 要进行变换的输入图像
	intensity: 变换的强度,范围从0到1,值越大,变换越明显

	返回值:
	变换后的图像
	"""
	height, width = image.shape[:2]

	# 生成随机透视变换的四个目标点
	x_offset = width * 0.4 * intensity
	y_offset = height * 0.4 * intensity
	dst_points = np.float32([[random.uniform(-x_offset, x_offset), random.uniform(-y_offset, y_offset)],
	[width - random.uniform(-x_offset, x_offset), random.uniform(-y_offset, y_offset)],
	[random.uniform(-x_offset, x_offset), height - random.uniform(-y_offset, y_offset)],
	[width - random.uniform(-x_offset, x_offset), height - random.uniform(-y_offset, y_offset)]])

	# 对应的源点是图像的四个角
	src_points = np.float32([[0, 0], [width, 0], [0, height], [width, height]])

	# 生成透视变换矩阵
	M = cv2.getPerspectiveTransform(src_points, dst_points)

	# 进行透视变换
	transformed_image = cv2.warpPerspective(image, M, (width, height))
	mask = np.ones_like(transformed_image)
	transformed_mask = cv2.warpPerspective(mask, M, (width, height))> 0.5

	kernel_size = 5
	kernel = np.ones((kernel_size, kernel_size), np.uint8)
	transformed_mask = cv2.erode(transformed_mask.astype(np.uint8), kernel, iterations=1).astype(np.uint8)

	white_back = np.ones_like(transformed_image) * 255
	transformed_image = transformed_image * transformed_mask + white_back * (1-transformed_mask)
	return transformed_image




	def mask_score(mask):
	'''Scoring the mask according to connectivity.'''
	mask = mask.astype(np.uint8)
	if mask.sum() < 10:
	return 0
	contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
	cnt_area = [cv2.contourArea(cnt) for cnt in contours]
	conc_score = np.max(cnt_area) / sum(cnt_area)
	return conc_score


	def sobel(img, mask, thresh = 50):
	'''Calculating the high-frequency map.'''
	H,W = img.shape[0], img.shape[1]
	img = cv2.resize(img,(256,256))
	mask = (cv2.resize(mask,(256,256)) > 0.5).astype(np.uint8)
	kernel = np.ones((5,5),np.uint8)
	mask = cv2.erode(mask, kernel, iterations = 2)

	Ksize = 3
	sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=Ksize)
	sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=Ksize)
	sobel_X = cv2.convertScaleAbs(sobelx)
	sobel_Y = cv2.convertScaleAbs(sobely)
	scharr = cv2.addWeighted(sobel_X, 0.5, sobel_Y, 0.5, 0)
	scharr = np.max(scharr,-1) * mask

	scharr[scharr < thresh] = 0.0
	scharr = np.stack([scharr,scharr,scharr],-1)
	scharr = (scharr.astype(np.float32)/255 * img.astype(np.float32) ).astype(np.uint8)
	scharr = cv2.resize(scharr,(W,H))
	return scharr


	def resize_and_pad(image, box):
	'''Fitting an image to the box region while keeping the aspect ratio.'''
	y1,y2,x1,x2 = box
	H,W = y2-y1, x2-x1
	h,w = image.shape[0], image.shape[1]
	r_box = W / H
	r_image = w / h
	if r_box >= r_image:
	h_target = H
	w_target = int(w * H / h)
	image = cv2.resize(image, (w_target, h_target))

	w1 = (W - w_target) // 2
	w2 = W - w_target - w1
	pad_param = ((0,0),(w1,w2),(0,0))
	image = np.pad(image, pad_param, 'constant', constant_values=255)
	else:
	w_target = W
	h_target = int(h * W / w)
	image = cv2.resize(image, (w_target, h_target))

	h1 = (H-h_target) // 2
	h2 = H - h_target - h1
	pad_param =((h1,h2),(0,0),(0,0))
	image = np.pad(image, pad_param, 'constant', constant_values=255)
	return image



	def expand_image_mask(image, mask, ratio=1.4, random = False):
	# expand image and mask
	# pad image with 255
	# pad mask with 0
	h,w = image.shape[0], image.shape[1]
	H,W = int(h * ratio), int(w * ratio)
	if random:
	h1 = np.random.randint(0, int(H - h))
	w1 = np.random.randint(0, int(W - w))
	else:
	h1 = int((H - h) // 2)
	w1 = int((W -w) // 2)
	h2 = H - h - h1
	w2 = W -w - w1
	pad_param_image = ((h1,h2),(w1,w2),(0,0))
	pad_param_mask = ((h1,h2),(w1,w2))
	image = np.pad(image, pad_param_image, 'constant', constant_values=255)
	mask = np.pad(mask, pad_param_mask, 'constant', constant_values=0)
	return image, mask


	def resize_box(yyxx, H,W,h,w):
	y1,y2,x1,x2 = yyxx
	y1,y2 = int(y1/H * h), int(y2/H * h)
	x1,x2 = int(x1/W * w), int(x2/W * w)
	y1,y2 = min(y1,h), min(y2,h)
	x1,x2 = min(x1,w), min(x2,w)
	return (y1,y2,x1,x2)


	def get_bbox_from_mask(mask):
	h,w = mask.shape[0],mask.shape[1]

	if mask.sum() < 10:
	return 0,h,0,w
	rows = np.any(mask,axis=1)
	cols = np.any(mask,axis=0)
	y1,y2 = np.where(rows)[0][[0,-1]]
	x1,x2 = np.where(cols)[0][[0,-1]]
	return (y1,y2,x1,x2)


	def expand_bbox(mask,yyxx,ratio=[1.2,2.0], min_crop=0):
	y1,y2,x1,x2 = yyxx
	ratio = np.random.randint( ratio[0] * 10, ratio[1] * 10 ) / 10
	H,W = mask.shape[0], mask.shape[1]
	xc, yc = 0.5 * (x1 + x2), 0.5 * (y1 + y2)
	h = ratio * (y2-y1+1)
	w = ratio * (x2-x1+1)
	h = max(h,min_crop)
	w = max(w,min_crop)

	x1 = int(xc - w * 0.5)
	x2 = int(xc + w * 0.5)
	y1 = int(yc - h * 0.5)
	y2 = int(yc + h * 0.5)

	x1 = max(0,x1)
	x2 = min(W,x2)
	y1 = max(0,y1)
	y2 = min(H,y2)
	return (y1,y2,x1,x2)


	def box2squre(image, box):
	H,W = image.shape[0], image.shape[1]
	y1,y2,x1,x2 = box
	cx = (x1 + x2) // 2
	cy = (y1 + y2) // 2
	h,w = y2-y1, x2-x1

	if h >= w:
	x1 = cx - h//2
	x2 = cx + h//2
	else:
	y1 = cy - w//2
	y2 = cy + w//2
	x1 = max(0,x1)
	x2 = min(W,x2)
	y1 = max(0,y1)
	y2 = min(H,y2)
	return (y1,y2,x1,x2)


	def pad_to_square(image, pad_value = 255, random = False):
	H,W = image.shape[0], image.shape[1]
	if H == W:
	return image

	padd = abs(H - W)
	if random:
	padd_1 = int(np.random.randint(0,padd))
	else:
	padd_1 = int(padd / 2)
	padd_2 = padd - padd_1

	if H > W:
	pad_param = ((0,0),(padd_1,padd_2),(0,0))
	else:
	pad_param = ((padd_1,padd_2),(0,0),(0,0))

	image = np.pad(image, pad_param, 'constant', constant_values=pad_value)
	return image



	def box_in_box(small_box, big_box):
	y1,y2,x1,x2 = small_box
	y1_b, _, x1_b, _ = big_box
	y1,y2,x1,x2 = y1 - y1_b ,y2 - y1_b, x1 - x1_b ,x2 - x1_b
	return (y1,y2,x1,x2 )



	def shuffle_image(image, N):
	height, width = image.shape[:2]

	block_height = height // N
	block_width = width // N
	blocks = []

	for i in range(N):
	for j in range(N):
	block = image[iblock_height:(i+1)block_height, jblock_width:(j+1)block_width]
	blocks.append(block)

	np.random.shuffle(blocks)
	shuffled_image = np.zeros((height, width, 3), dtype=np.uint8)

	for i in range(N):
	for j in range(N):
	shuffled_image[iblock_height:(i+1)block_height, jblock_width:(j+1)block_width] = blocks[i*N+j]
	return shuffled_image


	def get_mosaic_mask(image, fg_mask, N=16, ratio = 0.5):
	ids = [i for i in range(N * N)]
	masked_number = int(N * N * ratio)
	masked_id = np.random.choice(ids, masked_number, replace=False)



	height, width = image.shape[:2]
	mask = np.ones((height, width))

	block_height = height // N
	block_width = width // N

	b_id = 0
	for i in range(N):
	for j in range(N):
	if b_id in masked_id:
	mask[iblock_height:(i+1)block_height, jblock_width:(j+1)block_width] = mask[iblock_height:(i+1)block_height, jblock_width:(j+1)block_width] * 0
	b_id += 1
	mask = mask * fg_mask
	mask3 = np.stack([mask,mask,mask],-1).copy().astype(np.uint8)
	noise = q_x(image)
	noise_mask = image * mask3 + noise * (1-mask3)
	return noise_mask

	def extract_canney_noise(image, mask, dilate=True):
	h,w = image.shape[0],image.shape[1]
	mask = cv2.resize(mask.astype(np.uint8),(w,h)) > 0.5
	kernel = np.ones((8, 8), dtype=np.uint8)
	mask = cv2.erode(mask.astype(np.uint8), kernel, 10)

	canny = cv2.Canny(image, 50,100) * mask
	kernel = np.ones((8, 8), dtype=np.uint8)
	mask = (cv2.dilate(canny, kernel, 5) > 128).astype(np.uint8)
	mask = np.stack([mask,mask,mask],-1)

	pure_noise = q_x(image, t=1) * 0 + 255
	canny_noise = mask * image + (1-mask) * pure_noise
	return canny_noise


	def get_random_structure(size):
	choice = np.random.randint(1, 5)

	if choice == 1:
	return cv2.getStructuringElement(cv2.MORPH_RECT, (size, size))
	elif choice == 2:
	return cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (size, size))
	elif choice == 3:
	return cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (size, size//2))
	elif choice == 4:
	return cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (size//2, size))

	def random_dilate(seg, min=3, max=10):
	size = np.random.randint(min, max)
	kernel = get_random_structure(size)
	seg = cv2.dilate(seg,kernel,iterations = 1)
	return seg

	def random_erode(seg, min=3, max=10):
	size = np.random.randint(min, max)
	kernel = get_random_structure(size)
	seg = cv2.erode(seg,kernel,iterations = 1)
	return seg

	def compute_iou(seg, gt):
	intersection = seg*gt
	union = seg+gt
	return (np.count_nonzero(intersection) + 1e-6) / (np.count_nonzero(union) + 1e-6)


	def select_max_region(mask):
	nums, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, connectivity=8)
	background = 0
	for row in range(stats.shape[0]):
	if stats[row, :][0] == 0 and stats[row, :][1] == 0:
	background = row
	stats_no_bg = np.delete(stats, background, axis=0)
	max_idx = stats_no_bg[:, 4].argmax()
	max_region = np.where(labels==max_idx+1, 1, 0)

	return max_region.astype(np.uint8)



	def perturb_mask(gt, min_iou = 0.3, max_iou = 0.99):
	iou_target = np.random.uniform(min_iou, max_iou)
	h, w = gt.shape
	gt = gt.astype(np.uint8)
	seg = gt.copy()

	# Rare case
	if h <= 2 or w <= 2:
	print('GT too small, returning original')
	return seg

	# Do a bunch of random operations
	for _ in range(250):
	for _ in range(4):
	lx, ly = np.random.randint(w), np.random.randint(h)
	lw, lh = np.random.randint(lx+1,w+1), np.random.randint(ly+1,h+1)

	# Randomly set one pixel to 1/0. With the following dilate/erode, we can create holes/external regions
	if np.random.rand() < 0.1:
	cx = int((lx + lw) / 2)
	cy = int((ly + lh) / 2)
	seg[cy, cx] = np.random.randint(2) * 255

	# Dilate/erode
	if np.random.rand() < 0.5:
	seg[ly:lh, lx:lw] = random_dilate(seg[ly:lh, lx:lw])
	else:
	seg[ly:lh, lx:lw] = random_erode(seg[ly:lh, lx:lw])

	seg = np.logical_or(seg, gt).astype(np.uint8)
	#seg = select_max_region(seg)

	if compute_iou(seg, gt) < iou_target:
	break
	seg = select_max_region(seg.astype(np.uint8))
	return seg.astype(np.uint8)


	def q_x(x_0,t=65):
	'''Adding noise for and given image.'''
	x_0 = torch.from_numpy(x_0).float() / 127.5 - 1
	num_steps = 100

	betas = torch.linspace(-6,6,num_steps)
	betas = torch.sigmoid(betas)*(0.5e-2 - 1e-5)+1e-5

	alphas = 1-betas
	alphas_prod = torch.cumprod(alphas,0)

	alphas_prod_p = torch.cat([torch.tensor([1]).float(),alphas_prod[:-1]],0)
	alphas_bar_sqrt = torch.sqrt(alphas_prod)
	one_minus_alphas_bar_log = torch.log(1 - alphas_prod)
	one_minus_alphas_bar_sqrt = torch.sqrt(1 - alphas_prod)

	noise = torch.randn_like(x_0)
	alphas_t = alphas_bar_sqrt[t]
	alphas_1_m_t = one_minus_alphas_bar_sqrt[t]
	return (alphas_t * x_0 + alphas_1_m_t * noise).numpy() * 127.5 + 127.5


	def extract_target_boundary(img, target_mask):
	Ksize = 3
	sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=Ksize)
	sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=Ksize)

	# sobel-x
	sobel_X = cv2.convertScaleAbs(sobelx)
	# sobel-y
	sobel_Y = cv2.convertScaleAbs(sobely)
	# sobel-xy
	scharr = cv2.addWeighted(sobel_X, 0.5, sobel_Y, 0.5, 0)
	scharr = np.max(scharr,-1).astype(np.float32)/255
	scharr = scharr * target_mask.astype(np.float32)
	return scharr