Spaces:

Kedreamix
/

YoloGesture

Running

App Files Files Community

YoloGesture / utils /dataloader.py

Kedreamix

YoloGesture推理主要代码

4a3ab35 almost 2 years ago

raw

history blame

14.5 kB

	from random import sample, shuffle

	import cv2
	import numpy as np
	import torch
	from PIL import Image
	from torch.utils.data.dataset import Dataset

	from utils.utils import cvtColor, preprocess_input


	class YoloDataset(Dataset):
	def __init__(self, annotation_lines, input_shape, num_classes, epoch_length, mosaic, train, mosaic_ratio = 0.7):
	super(YoloDataset, self).__init__()
	self.annotation_lines = annotation_lines
	self.input_shape = input_shape
	self.num_classes = num_classes
	self.epoch_length = epoch_length
	self.mosaic = mosaic
	self.train = train
	self.mosaic_ratio = mosaic_ratio

	self.epoch_now = -1
	self.length = len(self.annotation_lines)

	def __len__(self):
	return self.length

	def __getitem__(self, index):
	index = index % self.length

	#---------------------------------------------------#
	# 训练时进行数据的随机增强
	# 验证时不进行数据的随机增强
	#---------------------------------------------------#
	if self.mosaic:
	if self.rand() < 0.5 and self.epoch_now < self.epoch_length * self.mosaic_ratio:
	lines = sample(self.annotation_lines, 3)
	lines.append(self.annotation_lines[index])
	shuffle(lines)
	image, box = self.get_random_data_with_Mosaic(lines, self.input_shape)
	else:
	image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
	else:
	image, box = self.get_random_data(self.annotation_lines[index], self.input_shape, random = self.train)
	image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
	box = np.array(box, dtype=np.float32)
	if len(box) != 0:
	box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
	box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]

	box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
	box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
	return image, box

	def rand(self, a=0, b=1):
	return np.random.rand()*(b-a) + a

	def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
	line = annotation_line.split()
	#------------------------------#
	# 读取图像并转换成RGB图像
	#------------------------------#
	image = Image.open(line[0])
	image = cvtColor(image)
	#------------------------------#
	# 获得图像的高宽与目标高宽
	#------------------------------#
	iw, ih = image.size
	h, w = input_shape
	#------------------------------#
	# 获得预测框
	#------------------------------#
	box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])

	if not random:
	scale = min(w/iw, h/ih)
	nw = int(iw*scale)
	nh = int(ih*scale)
	dx = (w-nw)//2
	dy = (h-nh)//2

	#---------------------------------#
	# 将图像多余的部分加上灰条
	#---------------------------------#
	image = image.resize((nw,nh), Image.BICUBIC)
	new_image = Image.new('RGB', (w,h), (128,128,128))
	new_image.paste(image, (dx, dy))
	image_data = np.array(new_image, np.float32)

	#---------------------------------#
	# 对真实框进行调整
	#---------------------------------#
	if len(box)>0:
	np.random.shuffle(box)
	box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
	box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
	box[:, 0:2][box[:, 0:2]<0] = 0
	box[:, 2][box[:, 2]>w] = w
	box[:, 3][box[:, 3]>h] = h
	box_w = box[:, 2] - box[:, 0]
	box_h = box[:, 3] - box[:, 1]
	box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box

	return image_data, box

	#------------------------------------------#
	# 对图像进行缩放并且进行长和宽的扭曲
	#------------------------------------------#
	new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
	scale = self.rand(.25, 2)
	if new_ar < 1:
	nh = int(scale*h)
	nw = int(nh*new_ar)
	else:
	nw = int(scale*w)
	nh = int(nw/new_ar)
	image = image.resize((nw,nh), Image.BICUBIC)

	#------------------------------------------#
	# 将图像多余的部分加上灰条
	#------------------------------------------#
	dx = int(self.rand(0, w-nw))
	dy = int(self.rand(0, h-nh))
	new_image = Image.new('RGB', (w,h), (128,128,128))
	new_image.paste(image, (dx, dy))
	image = new_image

	#------------------------------------------#
	# 翻转图像
	#------------------------------------------#
	flip = self.rand()<.5
	if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

	image_data = np.array(image, np.uint8)
	#---------------------------------#
	# 对图像进行色域变换
	# 计算色域变换的参数
	#---------------------------------#
	r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
	#---------------------------------#
	# 将图像转到HSV上
	#---------------------------------#
	hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
	dtype = image_data.dtype
	#---------------------------------#
	# 应用变换
	#---------------------------------#
	x = np.arange(0, 256, dtype=r.dtype)
	lut_hue = ((x * r[0]) % 180).astype(dtype)
	lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
	lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

	image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
	image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)

	#---------------------------------#
	# 对真实框进行调整
	#---------------------------------#
	if len(box)>0:
	np.random.shuffle(box)
	box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
	box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
	if flip: box[:, [0,2]] = w - box[:, [2,0]]
	box[:, 0:2][box[:, 0:2]<0] = 0
	box[:, 2][box[:, 2]>w] = w
	box[:, 3][box[:, 3]>h] = h
	box_w = box[:, 2] - box[:, 0]
	box_h = box[:, 3] - box[:, 1]
	box = box[np.logical_and(box_w>1, box_h>1)]

	return image_data, box

	def merge_bboxes(self, bboxes, cutx, cuty):
	merge_bbox = []
	for i in range(len(bboxes)):
	for box in bboxes[i]:
	tmp_box = []
	x1, y1, x2, y2 = box[0], box[1], box[2], box[3]

	if i == 0:
	if y1 > cuty or x1 > cutx:
	continue
	if y2 >= cuty and y1 <= cuty:
	y2 = cuty
	if x2 >= cutx and x1 <= cutx:
	x2 = cutx

	if i == 1:
	if y2 < cuty or x1 > cutx:
	continue
	if y2 >= cuty and y1 <= cuty:
	y1 = cuty
	if x2 >= cutx and x1 <= cutx:
	x2 = cutx

	if i == 2:
	if y2 < cuty or x2 < cutx:
	continue
	if y2 >= cuty and y1 <= cuty:
	y1 = cuty
	if x2 >= cutx and x1 <= cutx:
	x1 = cutx

	if i == 3:
	if y1 > cuty or x2 < cutx:
	continue
	if y2 >= cuty and y1 <= cuty:
	y2 = cuty
	if x2 >= cutx and x1 <= cutx:
	x1 = cutx
	tmp_box.append(x1)
	tmp_box.append(y1)
	tmp_box.append(x2)
	tmp_box.append(y2)
	tmp_box.append(box[-1])
	merge_bbox.append(tmp_box)
	return merge_bbox

	def get_random_data_with_Mosaic(self, annotation_line, input_shape, jitter=0.3, hue=.1, sat=0.7, val=0.4):
	h, w = input_shape
	min_offset_x = self.rand(0.3, 0.7)
	min_offset_y = self.rand(0.3, 0.7)

	image_datas = []
	box_datas = []
	index = 0
	for line in annotation_line:
	#---------------------------------#
	# 每一行进行分割
	#---------------------------------#
	line_content = line.split()
	#---------------------------------#
	# 打开图片
	#---------------------------------#
	image = Image.open(line_content[0])
	image = cvtColor(image)

	#---------------------------------#
	# 图片的大小
	#---------------------------------#
	iw, ih = image.size
	#---------------------------------#
	# 保存框的位置
	#---------------------------------#
	box = np.array([np.array(list(map(int,box.split(',')))) for box in line_content[1:]])

	#---------------------------------#
	# 是否翻转图片
	#---------------------------------#
	flip = self.rand()<.5
	if flip and len(box)>0:
	image = image.transpose(Image.FLIP_LEFT_RIGHT)
	box[:, [0,2]] = iw - box[:, [2,0]]

	#------------------------------------------#
	# 对图像进行缩放并且进行长和宽的扭曲
	#------------------------------------------#
	new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
	scale = self.rand(.4, 1)
	if new_ar < 1:
	nh = int(scale*h)
	nw = int(nh*new_ar)
	else:
	nw = int(scale*w)
	nh = int(nw/new_ar)
	image = image.resize((nw, nh), Image.BICUBIC)

	#-----------------------------------------------#
	# 将图片进行放置，分别对应四张分割图片的位置
	#-----------------------------------------------#
	if index == 0:
	dx = int(w*min_offset_x) - nw
	dy = int(h*min_offset_y) - nh
	elif index == 1:
	dx = int(w*min_offset_x) - nw
	dy = int(h*min_offset_y)
	elif index == 2:
	dx = int(w*min_offset_x)
	dy = int(h*min_offset_y)
	elif index == 3:
	dx = int(w*min_offset_x)
	dy = int(h*min_offset_y) - nh

	new_image = Image.new('RGB', (w,h), (128,128,128))
	new_image.paste(image, (dx, dy))
	image_data = np.array(new_image)

	index = index + 1
	box_data = []
	#---------------------------------#
	# 对box进行重新处理
	#---------------------------------#
	if len(box)>0:
	np.random.shuffle(box)
	box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
	box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
	box[:, 0:2][box[:, 0:2]<0] = 0
	box[:, 2][box[:, 2]>w] = w
	box[:, 3][box[:, 3]>h] = h
	box_w = box[:, 2] - box[:, 0]
	box_h = box[:, 3] - box[:, 1]
	box = box[np.logical_and(box_w>1, box_h>1)]
	box_data = np.zeros((len(box),5))
	box_data[:len(box)] = box

	image_datas.append(image_data)
	box_datas.append(box_data)

	#---------------------------------#
	# 将图片分割，放在一起
	#---------------------------------#
	cutx = int(w * min_offset_x)
	cuty = int(h * min_offset_y)

	new_image = np.zeros([h, w, 3])
	new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
	new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
	new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
	new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]

	new_image = np.array(new_image, np.uint8)
	#---------------------------------#
	# 对图像进行色域变换
	# 计算色域变换的参数
	#---------------------------------#
	r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
	#---------------------------------#
	# 将图像转到HSV上
	#---------------------------------#
	hue, sat, val = cv2.split(cv2.cvtColor(new_image, cv2.COLOR_RGB2HSV))
	dtype = new_image.dtype
	#---------------------------------#
	# 应用变换
	#---------------------------------#
	x = np.arange(0, 256, dtype=r.dtype)
	lut_hue = ((x * r[0]) % 180).astype(dtype)
	lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
	lut_val = np.clip(x * r[2], 0, 255).astype(dtype)

	new_image = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
	new_image = cv2.cvtColor(new_image, cv2.COLOR_HSV2RGB)

	#---------------------------------#
	# 对框进行进一步的处理
	#---------------------------------#
	new_boxes = self.merge_bboxes(box_datas, cutx, cuty)

	return new_image, new_boxes

	# DataLoader中collate_fn使用
	def yolo_dataset_collate(batch):
	images = []
	bboxes = []
	for img, box in batch:
	images.append(img)
	bboxes.append(box)
	images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
	bboxes = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes]
	return images, bboxes