KB-VQA-E

Sleeping

App Files Files Community

KB-VQA-E / models /yolov5 /utils /segment /dataloaders.py

m7mdal7aj

uploading a fresh copy of the detection models

92894b3 verified 9 months ago

raw

history blame

13.1 kB

	# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
	"""Dataloaders."""

	import os
	import random

	import cv2
	import numpy as np
	import torch
	from torch.utils.data import DataLoader, distributed

	from ..augmentations import augment_hsv, copy_paste, letterbox
	from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, SmartDistributedSampler, seed_worker
	from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn
	from ..torch_utils import torch_distributed_zero_first
	from .augmentations import mixup, random_perspective

	RANK = int(os.getenv("RANK", -1))


	def create_dataloader(
	path,
	imgsz,
	batch_size,
	stride,
	single_cls=False,
	hyp=None,
	augment=False,
	cache=False,
	pad=0.0,
	rect=False,
	rank=-1,
	workers=8,
	image_weights=False,
	quad=False,
	prefix="",
	shuffle=False,
	mask_downsample_ratio=1,
	overlap_mask=False,
	seed=0,
	):
	if rect and shuffle:
	LOGGER.warning("WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False")
	shuffle = False
	with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
	dataset = LoadImagesAndLabelsAndMasks(
	path,
	imgsz,
	batch_size,
	augment=augment, # augmentation
	hyp=hyp, # hyperparameters
	rect=rect, # rectangular batches
	cache_images=cache,
	single_cls=single_cls,
	stride=int(stride),
	pad=pad,
	image_weights=image_weights,
	prefix=prefix,
	downsample_ratio=mask_downsample_ratio,
	overlap=overlap_mask,
	rank=rank,
	)

	batch_size = min(batch_size, len(dataset))
	nd = torch.cuda.device_count() # number of CUDA devices
	nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
	sampler = None if rank == -1 else SmartDistributedSampler(dataset, shuffle=shuffle)
	loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
	generator = torch.Generator()
	generator.manual_seed(6148914691236517205 + seed + RANK)
	return loader(
	dataset,
	batch_size=batch_size,
	shuffle=shuffle and sampler is None,
	num_workers=nw,
	sampler=sampler,
	pin_memory=True,
	collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn,
	worker_init_fn=seed_worker,
	generator=generator,
	), dataset


	class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing
	def __init__(
	self,
	path,
	img_size=640,
	batch_size=16,
	augment=False,
	hyp=None,
	rect=False,
	image_weights=False,
	cache_images=False,
	single_cls=False,
	stride=32,
	pad=0,
	min_items=0,
	prefix="",
	downsample_ratio=1,
	overlap=False,
	rank=-1,
	seed=0,
	):
	super().__init__(
	path,
	img_size,
	batch_size,
	augment,
	hyp,
	rect,
	image_weights,
	cache_images,
	single_cls,
	stride,
	pad,
	min_items,
	prefix,
	rank,
	seed,
	)
	self.downsample_ratio = downsample_ratio
	self.overlap = overlap

	def __getitem__(self, index):
	index = self.indices[index] # linear, shuffled, or image_weights

	hyp = self.hyp
	mosaic = self.mosaic and random.random() < hyp["mosaic"]
	masks = []
	if mosaic:
	# Load mosaic
	img, labels, segments = self.load_mosaic(index)
	shapes = None

	# MixUp augmentation
	if random.random() < hyp["mixup"]:
	img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1)))

	else:
	# Load image
	img, (h0, w0), (h, w) = self.load_image(index)

	# Letterbox
	shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
	img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
	shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling

	labels = self.labels[index].copy()
	# [array, array, ....], array.shape=(num_points, 2), xyxyxyxy
	segments = self.segments[index].copy()
	if len(segments):
	for i_s in range(len(segments)):
	segments[i_s] = xyn2xy(
	segments[i_s],
	ratio[0] * w,
	ratio[1] * h,
	padw=pad[0],
	padh=pad[1],
	)
	if labels.size: # normalized xywh to pixel xyxy format
	labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])

	if self.augment:
	img, labels, segments = random_perspective(
	img,
	labels,
	segments=segments,
	degrees=hyp["degrees"],
	translate=hyp["translate"],
	scale=hyp["scale"],
	shear=hyp["shear"],
	perspective=hyp["perspective"],
	)

	nl = len(labels) # number of labels
	if nl:
	labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3)
	if self.overlap:
	masks, sorted_idx = polygons2masks_overlap(
	img.shape[:2], segments, downsample_ratio=self.downsample_ratio
	)
	masks = masks[None] # (640, 640) -> (1, 640, 640)
	labels = labels[sorted_idx]
	else:
	masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)

	masks = (
	torch.from_numpy(masks)
	if len(masks)
	else torch.zeros(
	1 if self.overlap else nl, img.shape[0] // self.downsample_ratio, img.shape[1] // self.downsample_ratio
	)
	)
	# TODO: albumentations support
	if self.augment:
	# Albumentations
	# there are some augmentation that won't change boxes and masks,
	# so just be it for now.
	img, labels = self.albumentations(img, labels)
	nl = len(labels) # update after albumentations

	# HSV color-space
	augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"])

	# Flip up-down
	if random.random() < hyp["flipud"]:
	img = np.flipud(img)
	if nl:
	labels[:, 2] = 1 - labels[:, 2]
	masks = torch.flip(masks, dims=[1])

	# Flip left-right
	if random.random() < hyp["fliplr"]:
	img = np.fliplr(img)
	if nl:
	labels[:, 1] = 1 - labels[:, 1]
	masks = torch.flip(masks, dims=[2])

	# Cutouts # labels = cutout(img, labels, p=0.5)

	labels_out = torch.zeros((nl, 6))
	if nl:
	labels_out[:, 1:] = torch.from_numpy(labels)

	# Convert
	img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
	img = np.ascontiguousarray(img)

	return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)

	def load_mosaic(self, index):
	# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
	labels4, segments4 = [], []
	s = self.img_size
	yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y

	# 3 additional image indices
	indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
	for i, index in enumerate(indices):
	# Load image
	img, _, (h, w) = self.load_image(index)

	# place img in img4
	if i == 0: # top left
	img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
	x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
	x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
	elif i == 1: # top right
	x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
	x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
	elif i == 2: # bottom left
	x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
	x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
	elif i == 3: # bottom right
	x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
	x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

	img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
	padw = x1a - x1b
	padh = y1a - y1b

	labels, segments = self.labels[index].copy(), self.segments[index].copy()

	if labels.size:
	labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
	segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
	labels4.append(labels)
	segments4.extend(segments)

	# Concat/clip labels
	labels4 = np.concatenate(labels4, 0)
	for x in (labels4[:, 1:], *segments4):
	np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
	# img4, labels4 = replicate(img4, labels4) # replicate

	# Augment
	img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"])
	img4, labels4, segments4 = random_perspective(
	img4,
	labels4,
	segments4,
	degrees=self.hyp["degrees"],
	translate=self.hyp["translate"],
	scale=self.hyp["scale"],
	shear=self.hyp["shear"],
	perspective=self.hyp["perspective"],
	border=self.mosaic_border,
	) # border to remove
	return img4, labels4, segments4

	@staticmethod
	def collate_fn(batch):
	img, label, path, shapes, masks = zip(*batch) # transposed
	batched_masks = torch.cat(masks, 0)
	for i, l in enumerate(label):
	l[:, 0] = i # add target image index for build_targets()
	return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks


	def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
	"""
	Args:
	img_size (tuple): The image size.
	polygons (np.ndarray): [N, M], N is the number of polygons,
	M is the number of points(Be divided by 2).
	"""
	mask = np.zeros(img_size, dtype=np.uint8)
	polygons = np.asarray(polygons)
	polygons = polygons.astype(np.int32)
	shape = polygons.shape
	polygons = polygons.reshape(shape[0], -1, 2)
	cv2.fillPoly(mask, polygons, color=color)
	nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
	# NOTE: fillPoly firstly then resize is trying the keep the same way
	# of loss calculation when mask-ratio=1.
	mask = cv2.resize(mask, (nw, nh))
	return mask


	def polygons2masks(img_size, polygons, color, downsample_ratio=1):
	"""
	Args:
	img_size (tuple): The image size.
	polygons (list[np.ndarray]): each polygon is [N, M],
	N is the number of polygons,
	M is the number of points(Be divided by 2).
	"""
	masks = []
	for si in range(len(polygons)):
	mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
	masks.append(mask)
	return np.array(masks)


	def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
	"""Return a (640, 640) overlap mask."""
	masks = np.zeros(
	(img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
	dtype=np.int32 if len(segments) > 255 else np.uint8,
	)
	areas = []
	ms = []
	for si in range(len(segments)):
	mask = polygon2mask(
	img_size,
	[segments[si].reshape(-1)],
	downsample_ratio=downsample_ratio,
	color=1,
	)
	ms.append(mask)
	areas.append(mask.sum())
	areas = np.asarray(areas)
	index = np.argsort(-areas)
	ms = np.array(ms)[index]
	for i in range(len(segments)):
	mask = ms[i] * (i + 1)
	masks = masks + mask
	masks = np.clip(masks, a_min=0, a_max=i + 1)
	return masks, index