Spaces:
Running
Running
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license | |
"""Dataloaders.""" | |
import os | |
import random | |
import cv2 | |
import numpy as np | |
import torch | |
from torch.utils.data import DataLoader, distributed | |
from ..augmentations import augment_hsv, copy_paste, letterbox | |
from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, SmartDistributedSampler, seed_worker | |
from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn | |
from ..torch_utils import torch_distributed_zero_first | |
from .augmentations import mixup, random_perspective | |
RANK = int(os.getenv("RANK", -1)) | |
def create_dataloader( | |
path, | |
imgsz, | |
batch_size, | |
stride, | |
single_cls=False, | |
hyp=None, | |
augment=False, | |
cache=False, | |
pad=0.0, | |
rect=False, | |
rank=-1, | |
workers=8, | |
image_weights=False, | |
quad=False, | |
prefix="", | |
shuffle=False, | |
mask_downsample_ratio=1, | |
overlap_mask=False, | |
seed=0, | |
): | |
if rect and shuffle: | |
LOGGER.warning("WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False") | |
shuffle = False | |
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP | |
dataset = LoadImagesAndLabelsAndMasks( | |
path, | |
imgsz, | |
batch_size, | |
augment=augment, # augmentation | |
hyp=hyp, # hyperparameters | |
rect=rect, # rectangular batches | |
cache_images=cache, | |
single_cls=single_cls, | |
stride=int(stride), | |
pad=pad, | |
image_weights=image_weights, | |
prefix=prefix, | |
downsample_ratio=mask_downsample_ratio, | |
overlap=overlap_mask, | |
rank=rank, | |
) | |
batch_size = min(batch_size, len(dataset)) | |
nd = torch.cuda.device_count() # number of CUDA devices | |
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers | |
sampler = None if rank == -1 else SmartDistributedSampler(dataset, shuffle=shuffle) | |
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates | |
generator = torch.Generator() | |
generator.manual_seed(6148914691236517205 + seed + RANK) | |
return loader( | |
dataset, | |
batch_size=batch_size, | |
shuffle=shuffle and sampler is None, | |
num_workers=nw, | |
sampler=sampler, | |
pin_memory=True, | |
collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, | |
worker_init_fn=seed_worker, | |
generator=generator, | |
), dataset | |
class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing | |
def __init__( | |
self, | |
path, | |
img_size=640, | |
batch_size=16, | |
augment=False, | |
hyp=None, | |
rect=False, | |
image_weights=False, | |
cache_images=False, | |
single_cls=False, | |
stride=32, | |
pad=0, | |
min_items=0, | |
prefix="", | |
downsample_ratio=1, | |
overlap=False, | |
rank=-1, | |
seed=0, | |
): | |
super().__init__( | |
path, | |
img_size, | |
batch_size, | |
augment, | |
hyp, | |
rect, | |
image_weights, | |
cache_images, | |
single_cls, | |
stride, | |
pad, | |
min_items, | |
prefix, | |
rank, | |
seed, | |
) | |
self.downsample_ratio = downsample_ratio | |
self.overlap = overlap | |
def __getitem__(self, index): | |
index = self.indices[index] # linear, shuffled, or image_weights | |
hyp = self.hyp | |
mosaic = self.mosaic and random.random() < hyp["mosaic"] | |
masks = [] | |
if mosaic: | |
# Load mosaic | |
img, labels, segments = self.load_mosaic(index) | |
shapes = None | |
# MixUp augmentation | |
if random.random() < hyp["mixup"]: | |
img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1))) | |
else: | |
# Load image | |
img, (h0, w0), (h, w) = self.load_image(index) | |
# Letterbox | |
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape | |
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) | |
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling | |
labels = self.labels[index].copy() | |
# [array, array, ....], array.shape=(num_points, 2), xyxyxyxy | |
segments = self.segments[index].copy() | |
if len(segments): | |
for i_s in range(len(segments)): | |
segments[i_s] = xyn2xy( | |
segments[i_s], | |
ratio[0] * w, | |
ratio[1] * h, | |
padw=pad[0], | |
padh=pad[1], | |
) | |
if labels.size: # normalized xywh to pixel xyxy format | |
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) | |
if self.augment: | |
img, labels, segments = random_perspective( | |
img, | |
labels, | |
segments=segments, | |
degrees=hyp["degrees"], | |
translate=hyp["translate"], | |
scale=hyp["scale"], | |
shear=hyp["shear"], | |
perspective=hyp["perspective"], | |
) | |
nl = len(labels) # number of labels | |
if nl: | |
labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) | |
if self.overlap: | |
masks, sorted_idx = polygons2masks_overlap( | |
img.shape[:2], segments, downsample_ratio=self.downsample_ratio | |
) | |
masks = masks[None] # (640, 640) -> (1, 640, 640) | |
labels = labels[sorted_idx] | |
else: | |
masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) | |
masks = ( | |
torch.from_numpy(masks) | |
if len(masks) | |
else torch.zeros( | |
1 if self.overlap else nl, img.shape[0] // self.downsample_ratio, img.shape[1] // self.downsample_ratio | |
) | |
) | |
# TODO: albumentations support | |
if self.augment: | |
# Albumentations | |
# there are some augmentation that won't change boxes and masks, | |
# so just be it for now. | |
img, labels = self.albumentations(img, labels) | |
nl = len(labels) # update after albumentations | |
# HSV color-space | |
augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) | |
# Flip up-down | |
if random.random() < hyp["flipud"]: | |
img = np.flipud(img) | |
if nl: | |
labels[:, 2] = 1 - labels[:, 2] | |
masks = torch.flip(masks, dims=[1]) | |
# Flip left-right | |
if random.random() < hyp["fliplr"]: | |
img = np.fliplr(img) | |
if nl: | |
labels[:, 1] = 1 - labels[:, 1] | |
masks = torch.flip(masks, dims=[2]) | |
# Cutouts # labels = cutout(img, labels, p=0.5) | |
labels_out = torch.zeros((nl, 6)) | |
if nl: | |
labels_out[:, 1:] = torch.from_numpy(labels) | |
# Convert | |
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB | |
img = np.ascontiguousarray(img) | |
return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks) | |
def load_mosaic(self, index): | |
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic | |
labels4, segments4 = [], [] | |
s = self.img_size | |
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y | |
# 3 additional image indices | |
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices | |
for i, index in enumerate(indices): | |
# Load image | |
img, _, (h, w) = self.load_image(index) | |
# place img in img4 | |
if i == 0: # top left | |
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles | |
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) | |
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) | |
elif i == 1: # top right | |
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc | |
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h | |
elif i == 2: # bottom left | |
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) | |
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) | |
elif i == 3: # bottom right | |
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) | |
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) | |
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] | |
padw = x1a - x1b | |
padh = y1a - y1b | |
labels, segments = self.labels[index].copy(), self.segments[index].copy() | |
if labels.size: | |
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format | |
segments = [xyn2xy(x, w, h, padw, padh) for x in segments] | |
labels4.append(labels) | |
segments4.extend(segments) | |
# Concat/clip labels | |
labels4 = np.concatenate(labels4, 0) | |
for x in (labels4[:, 1:], *segments4): | |
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() | |
# img4, labels4 = replicate(img4, labels4) # replicate | |
# Augment | |
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"]) | |
img4, labels4, segments4 = random_perspective( | |
img4, | |
labels4, | |
segments4, | |
degrees=self.hyp["degrees"], | |
translate=self.hyp["translate"], | |
scale=self.hyp["scale"], | |
shear=self.hyp["shear"], | |
perspective=self.hyp["perspective"], | |
border=self.mosaic_border, | |
) # border to remove | |
return img4, labels4, segments4 | |
def collate_fn(batch): | |
img, label, path, shapes, masks = zip(*batch) # transposed | |
batched_masks = torch.cat(masks, 0) | |
for i, l in enumerate(label): | |
l[:, 0] = i # add target image index for build_targets() | |
return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks | |
def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): | |
""" | |
Args: | |
img_size (tuple): The image size. | |
polygons (np.ndarray): [N, M], N is the number of polygons, | |
M is the number of points(Be divided by 2). | |
""" | |
mask = np.zeros(img_size, dtype=np.uint8) | |
polygons = np.asarray(polygons) | |
polygons = polygons.astype(np.int32) | |
shape = polygons.shape | |
polygons = polygons.reshape(shape[0], -1, 2) | |
cv2.fillPoly(mask, polygons, color=color) | |
nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) | |
# NOTE: fillPoly firstly then resize is trying the keep the same way | |
# of loss calculation when mask-ratio=1. | |
mask = cv2.resize(mask, (nw, nh)) | |
return mask | |
def polygons2masks(img_size, polygons, color, downsample_ratio=1): | |
""" | |
Args: | |
img_size (tuple): The image size. | |
polygons (list[np.ndarray]): each polygon is [N, M], | |
N is the number of polygons, | |
M is the number of points(Be divided by 2). | |
""" | |
masks = [] | |
for si in range(len(polygons)): | |
mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio) | |
masks.append(mask) | |
return np.array(masks) | |
def polygons2masks_overlap(img_size, segments, downsample_ratio=1): | |
"""Return a (640, 640) overlap mask.""" | |
masks = np.zeros( | |
(img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), | |
dtype=np.int32 if len(segments) > 255 else np.uint8, | |
) | |
areas = [] | |
ms = [] | |
for si in range(len(segments)): | |
mask = polygon2mask( | |
img_size, | |
[segments[si].reshape(-1)], | |
downsample_ratio=downsample_ratio, | |
color=1, | |
) | |
ms.append(mask) | |
areas.append(mask.sum()) | |
areas = np.asarray(areas) | |
index = np.argsort(-areas) | |
ms = np.array(ms)[index] | |
for i in range(len(segments)): | |
mask = ms[i] * (i + 1) | |
masks = masks + mask | |
masks = np.clip(masks, a_min=0, a_max=i + 1) | |
return masks, index | |