diff --git a/README.md b/README.md index 2f5a6e719f5182514782048c672412eb64c4edb9..53c9a3f54b47bdf3ef4822dc95682ff2c7984fc5 100644 --- a/README.md +++ b/README.md @@ -11,3 +11,6 @@ license: mit --- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference + +This app will detect Buffalos in the image. +Try out with different images. \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..b18ed71eb6537b6266cddab5aa87409483514c42 --- /dev/null +++ b/app.py @@ -0,0 +1,76 @@ +import cv2, os +import gradio as gr +import numpy as np +import torch +from models.common import DetectMultiBackend +from utils.augmentations import letterbox +from utils.general import non_max_suppression, scale_boxes +from utils.plots import Annotator, colors, save_one_box +from utils.torch_utils import select_device + +# path = r"C:\Users\H504171\U_Personal\YoloV3\data\buffalo\Test\341.jpg" +img_size = 640 +stride = 32 +auto = True +max_det=1000 +classes = None +agnostic_nms = False +line_thickness = 3 + +# Load model +device = select_device('cpu') +dnn =False +data = "data/custom_data.yaml" +weights = "weights/best.pt" +model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=False) + + + +def inference(image, iou_thres, conf_thres): + im = letterbox(image, img_size, stride=stride, auto=auto)[0] # padded resize + im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + im = np.ascontiguousarray(im) # contiguous + im = torch.from_numpy(im).to(model.device) + im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] + pred = model(im, augment=False, visualize=False) + pred = pred[0][1] if isinstance(pred[0], list) else pred[0] + pred = non_max_suppression(pred, conf_thres, iou_thres, None, agnostic_nms, max_det=max_det) + + # Process predictions + for i, det in enumerate(pred): # per image + gn = torch.tensor(image.shape)[[1, 0, 1, 0]] # normalization gain whwh + imc = image.copy() + annotator = Annotator(image, line_width=line_thickness, example=str({0:'buffalo'})) + if len(det): + # Rescale boxes from img_size to im0 size + det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], image.shape).round() + + # Print results + for c in det[:, 5].unique(): + n = (det[:, 5] == c).sum() # detections per class + + # Write results + for *xyxy, conf, cls in reversed(det): + c = int(cls) # integer class + label = 'buffalo' + annotator.box_label(xyxy, label, color=colors(c, True)) + # Stream results + im0 = annotator.result() + return im0 + + +title = "YOLO V9 trained on Custom Dataset" +description = "Gradio interface to show yoloV9 object detection." +examples = [[f'examples/{i}'] for i in os.listdir("examples")] +demo = gr.Interface( + inference, + inputs = [gr.Image(height=640, width = 640, label="Input Image"), gr.Slider(0, 1, value = 0.5, label="IOU Value"), gr.Slider(0, 1, value = 0.5, label="Threshold Value")], + outputs = [gr.Image(label="YoloV9 Output", height=640, width = 640)], + title = title, + description = description, + examples = examples, +) +demo.launch() \ No newline at end of file diff --git a/data/custom_data.yaml b/data/custom_data.yaml new file mode 100644 index 0000000000000000000000000000000000000000..836e900eb1b7282aa3b7e94695586a43c6395ae1 --- /dev/null +++ b/data/custom_data.yaml @@ -0,0 +1,6 @@ +train: data/buffalo/train/images +val: data/buffalo/val/images +test: data/buffalo/test/images + +nc: 1 +names: ["buffalo"] \ No newline at end of file diff --git a/examples/341.jpg b/examples/341.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3448b78f95908552efdcc34a0d6ef09279f655df Binary files /dev/null and b/examples/341.jpg differ diff --git a/examples/342.jpg b/examples/342.jpg new file mode 100644 index 0000000000000000000000000000000000000000..179fa4d43f90e635115021bafca9d991d79fb4d3 Binary files /dev/null and b/examples/342.jpg differ diff --git a/examples/343.jpg b/examples/343.jpg new file mode 100644 index 0000000000000000000000000000000000000000..272150a7a8bcc2d0b9684ceb41245cd2fc9186fd Binary files /dev/null and b/examples/343.jpg differ diff --git a/examples/344.jpg b/examples/344.jpg new file mode 100644 index 0000000000000000000000000000000000000000..108923ebc72deb4b5bc5d55ce0d6b8f0fef3accf Binary files /dev/null and b/examples/344.jpg differ diff --git a/examples/345.jpg b/examples/345.jpg new file mode 100644 index 0000000000000000000000000000000000000000..45e68c0226b5e4b00da33c6966a884f150da6606 Binary files /dev/null and b/examples/345.jpg differ diff --git a/examples/346.jpg b/examples/346.jpg new file mode 100644 index 0000000000000000000000000000000000000000..817f668d50d513583837d02f1f18d40602695688 Binary files /dev/null and b/examples/346.jpg differ diff --git a/examples/347.jpg b/examples/347.jpg new file mode 100644 index 0000000000000000000000000000000000000000..79f9bdbf2b1a7db88193911d37444f2a0ffd68e1 Binary files /dev/null and b/examples/347.jpg differ diff --git a/examples/348.jpg b/examples/348.jpg new file mode 100644 index 0000000000000000000000000000000000000000..39f7c8558791a007e20e42d8690ddb9db1d7fb8f Binary files /dev/null and b/examples/348.jpg differ diff --git a/examples/349.jpg b/examples/349.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7e2661f15b08572cb2cccec19a6de0b6c118d644 Binary files /dev/null and b/examples/349.jpg differ diff --git a/examples/350.jpg b/examples/350.jpg new file mode 100644 index 0000000000000000000000000000000000000000..70296140b4a51a1807d52bfe01e7573e4368036b Binary files /dev/null and b/examples/350.jpg differ diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..84952a8167bc2975913a6def6b4f027d566552a9 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1 @@ +# init \ No newline at end of file diff --git a/models/common.py b/models/common.py new file mode 100644 index 0000000000000000000000000000000000000000..c6de2cd66e5b1182494a8f17102303804b3fc34f --- /dev/null +++ b/models/common.py @@ -0,0 +1,1200 @@ +import ast +import contextlib +import json +import math +import platform +import warnings +import zipfile +from collections import OrderedDict, namedtuple +from copy import copy +from pathlib import Path +from urllib.parse import urlparse + +from typing import Optional + +import cv2 +import numpy as np +import pandas as pd +import requests +import torch +import torch.nn as nn +from IPython.display import display +from PIL import Image +from torch.cuda import amp + +from utils import TryExcept +from utils.dataloaders import exif_transpose, letterbox +from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr, + increment_path, is_notebook, make_divisible, non_max_suppression, scale_boxes, + xywh2xyxy, xyxy2xywh, yaml_load) +from utils.plots import Annotator, colors, save_one_box +from utils.torch_utils import copy_attr, smart_inference_mode + + +def autopad(k, p=None, d=1): # kernel, padding, dilation + # Pad to 'same' shape outputs + if d > 1: + k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +class Conv(nn.Module): + # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation) + default_act = nn.SiLU() # default activation + + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): + super().__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def forward_fuse(self, x): + return self.act(self.conv(x)) + + +class AConv(nn.Module): + def __init__(self, c1, c2): # ch_in, ch_out, shortcut, kernels, groups, expand + super().__init__() + self.cv1 = Conv(c1, c2, 3, 2, 1) + + def forward(self, x): + x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True) + return self.cv1(x) + + +class ADown(nn.Module): + def __init__(self, c1, c2): # ch_in, ch_out, shortcut, kernels, groups, expand + super().__init__() + self.c = c2 // 2 + self.cv1 = Conv(c1 // 2, self.c, 3, 2, 1) + self.cv2 = Conv(c1 // 2, self.c, 1, 1, 0) + + def forward(self, x): + x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True) + x1,x2 = x.chunk(2, 1) + x1 = self.cv1(x1) + x2 = torch.nn.functional.max_pool2d(x2, 3, 2, 1) + x2 = self.cv2(x2) + return torch.cat((x1, x2), 1) + + +class RepConvN(nn.Module): + """RepConv is a basic rep-style block, including training and deploy status + This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py + """ + default_act = nn.SiLU() # default activation + + def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False): + super().__init__() + assert k == 3 and p == 1 + self.g = g + self.c1 = c1 + self.c2 = c2 + self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() + + self.bn = None + self.conv1 = Conv(c1, c2, k, s, p=p, g=g, act=False) + self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False) + + def forward_fuse(self, x): + """Forward process""" + return self.act(self.conv(x)) + + def forward(self, x): + """Forward process""" + id_out = 0 if self.bn is None else self.bn(x) + return self.act(self.conv1(x) + self.conv2(x) + id_out) + + def get_equivalent_kernel_bias(self): + kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1) + kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2) + kernelid, biasid = self._fuse_bn_tensor(self.bn) + return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + + def _avg_to_3x3_tensor(self, avgp): + channels = self.c1 + groups = self.g + kernel_size = avgp.kernel_size + input_dim = channels // groups + k = torch.zeros((channels, input_dim, kernel_size, kernel_size)) + k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2 + return k + + def _pad_1x1_to_3x3_tensor(self, kernel1x1): + if kernel1x1 is None: + return 0 + else: + return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) + + def _fuse_bn_tensor(self, branch): + if branch is None: + return 0, 0 + if isinstance(branch, Conv): + kernel = branch.conv.weight + running_mean = branch.bn.running_mean + running_var = branch.bn.running_var + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn.eps + elif isinstance(branch, nn.BatchNorm2d): + if not hasattr(self, 'id_tensor'): + input_dim = self.c1 // self.g + kernel_value = np.zeros((self.c1, input_dim, 3, 3), dtype=np.float32) + for i in range(self.c1): + kernel_value[i, i % input_dim, 1, 1] = 1 + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + kernel = self.id_tensor + running_mean = branch.running_mean + running_var = branch.running_var + gamma = branch.weight + beta = branch.bias + eps = branch.eps + std = (running_var + eps).sqrt() + t = (gamma / std).reshape(-1, 1, 1, 1) + return kernel * t, beta - running_mean * gamma / std + + def fuse_convs(self): + if hasattr(self, 'conv'): + return + kernel, bias = self.get_equivalent_kernel_bias() + self.conv = nn.Conv2d(in_channels=self.conv1.conv.in_channels, + out_channels=self.conv1.conv.out_channels, + kernel_size=self.conv1.conv.kernel_size, + stride=self.conv1.conv.stride, + padding=self.conv1.conv.padding, + dilation=self.conv1.conv.dilation, + groups=self.conv1.conv.groups, + bias=True).requires_grad_(False) + self.conv.weight.data = kernel + self.conv.bias.data = bias + for para in self.parameters(): + para.detach_() + self.__delattr__('conv1') + self.__delattr__('conv2') + if hasattr(self, 'nm'): + self.__delattr__('nm') + if hasattr(self, 'bn'): + self.__delattr__('bn') + if hasattr(self, 'id_tensor'): + self.__delattr__('id_tensor') + + +class SP(nn.Module): + def __init__(self, k=3, s=1): + super(SP, self).__init__() + self.m = nn.MaxPool2d(kernel_size=k, stride=s, padding=k // 2) + + def forward(self, x): + return self.m(x) + + +class MP(nn.Module): + # Max pooling + def __init__(self, k=2): + super(MP, self).__init__() + self.m = nn.MaxPool2d(kernel_size=k, stride=k) + + def forward(self, x): + return self.m(x) + + +class ConvTranspose(nn.Module): + # Convolution transpose 2d layer + default_act = nn.SiLU() # default activation + + def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True): + super().__init__() + self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn) + self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity() + self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() + + def forward(self, x): + return self.act(self.bn(self.conv_transpose(x))) + + +class DWConv(Conv): + # Depth-wise convolution + def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation + super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act) + + +class DWConvTranspose2d(nn.ConvTranspose2d): + # Depth-wise transpose convolution + def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out + super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) + + +class DFL(nn.Module): + # DFL module + def __init__(self, c1=17): + super().__init__() + self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) + self.conv.weight.data[:] = nn.Parameter(torch.arange(c1, dtype=torch.float).view(1, c1, 1, 1)) # / 120.0 + self.c1 = c1 + # self.bn = nn.BatchNorm2d(4) + + def forward(self, x): + b, c, a = x.shape # batch, channels, anchors + return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a) + # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a) + + +class BottleneckBase(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, k=(1, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, k[0], 1) + self.cv2 = Conv(c_, c2, k[1], 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class RBottleneckBase(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 1), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, k[0], 1) + self.cv2 = Conv(c_, c2, k[1], 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class RepNRBottleneckBase(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 1), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = RepConvN(c1, c_, k[0], 1) + self.cv2 = Conv(c_, c2, k[1], 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class Bottleneck(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, k[0], 1) + self.cv2 = Conv(c_, c2, k[1], 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class RepNBottleneck(nn.Module): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, kernels, groups, expand + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = RepConvN(c1, c_, k[0], 1) + self.cv2 = Conv(c_, c2, k[1], 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class Res(nn.Module): + # ResNet bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super(Res, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c_, 3, 1, g=g) + self.cv3 = Conv(c_, c2, 1, 1) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv3(self.cv2(self.cv1(x))) if self.add else self.cv3(self.cv2(self.cv1(x))) + + +class RepNRes(nn.Module): + # ResNet bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion + super(RepNRes, self).__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = RepConvN(c_, c_, 3, 1, g=g) + self.cv3 = Conv(c_, c2, 1, 1) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv3(self.cv2(self.cv1(x))) if self.add else self.cv3(self.cv2(self.cv1(x))) + + +class BottleneckCSP(nn.Module): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) + + def forward(self, x): + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1)))) + + +class CSP(nn.Module): + # CSP Bottleneck with 3 convolutions + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) + self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) + + def forward(self, x): + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) + + +class RepNCSP(nn.Module): + # CSP Bottleneck with 3 convolutions + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) + self.m = nn.Sequential(*(RepNBottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) + + def forward(self, x): + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) + + +class CSPBase(nn.Module): + # CSP Bottleneck with 3 convolutions + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) + self.m = nn.Sequential(*(BottleneckBase(c_, c_, shortcut, g, e=1.0) for _ in range(n))) + + def forward(self, x): + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) + + +class SPP(nn.Module): + # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729 + def __init__(self, c1, c2, k=(5, 9, 13)): + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + + def forward(self, x): + x = self.cv1(x) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning + return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) + + +class ASPP(torch.nn.Module): + + def __init__(self, in_channels, out_channels): + super().__init__() + kernel_sizes = [1, 3, 3, 1] + dilations = [1, 3, 6, 1] + paddings = [0, 3, 6, 0] + self.aspp = torch.nn.ModuleList() + for aspp_idx in range(len(kernel_sizes)): + conv = torch.nn.Conv2d( + in_channels, + out_channels, + kernel_size=kernel_sizes[aspp_idx], + stride=1, + dilation=dilations[aspp_idx], + padding=paddings[aspp_idx], + bias=True) + self.aspp.append(conv) + self.gap = torch.nn.AdaptiveAvgPool2d(1) + self.aspp_num = len(kernel_sizes) + for m in self.modules(): + if isinstance(m, torch.nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + m.bias.data.fill_(0) + + def forward(self, x): + avg_x = self.gap(x) + out = [] + for aspp_idx in range(self.aspp_num): + inp = avg_x if (aspp_idx == self.aspp_num - 1) else x + out.append(F.relu_(self.aspp[aspp_idx](inp))) + out[-1] = out[-1].expand_as(out[-2]) + out = torch.cat(out, dim=1) + return out + + +class SPPCSPC(nn.Module): + # CSP SPP https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): + super(SPPCSPC, self).__init__() + c_ = int(2 * c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(c_, c_, 3, 1) + self.cv4 = Conv(c_, c_, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + self.cv5 = Conv(4 * c_, c_, 1, 1) + self.cv6 = Conv(c_, c_, 3, 1) + self.cv7 = Conv(2 * c_, c2, 1, 1) + + def forward(self, x): + x1 = self.cv4(self.cv3(self.cv1(x))) + y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1))) + y2 = self.cv2(x) + return self.cv7(torch.cat((y1, y2), dim=1)) + + +class SPPF(nn.Module): + # Spatial Pyramid Pooling - Fast (SPPF) layer by Glenn Jocher + def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * 4, c2, 1, 1) + self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) + # self.m = SoftPool2d(kernel_size=k, stride=1, padding=k // 2) + + def forward(self, x): + x = self.cv1(x) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) + + +import torch.nn.functional as F +from torch.nn.modules.utils import _pair + + +class ReOrg(nn.Module): + # yolo + def __init__(self): + super(ReOrg, self).__init__() + + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) + + +class Contract(nn.Module): + # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain' + s = self.gain + x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2) + x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) + return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40) + + +class Expand(nn.Module): + # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) + def __init__(self, gain=2): + super().__init__() + self.gain = gain + + def forward(self, x): + b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' + s = self.gain + x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80) + x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) + return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160) + + +class Concat(nn.Module): + # Concatenate a list of tensors along dimension + def __init__(self, dimension=1): + super().__init__() + self.d = dimension + + def forward(self, x): + return torch.cat(x, self.d) + + +class Shortcut(nn.Module): + def __init__(self, dimension=0): + super(Shortcut, self).__init__() + self.d = dimension + + def forward(self, x): + return x[0]+x[1] + + +class Silence(nn.Module): + def __init__(self): + super(Silence, self).__init__() + def forward(self, x): + return x + + +##### GELAN ##### + +class SPPELAN(nn.Module): + # spp-elan + def __init__(self, c1, c2, c3): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + self.c = c3 + self.cv1 = Conv(c1, c3, 1, 1) + self.cv2 = SP(5) + self.cv3 = SP(5) + self.cv4 = SP(5) + self.cv5 = Conv(4*c3, c2, 1, 1) + + def forward(self, x): + y = [self.cv1(x)] + y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4]) + return self.cv5(torch.cat(y, 1)) + + +class RepNCSPELAN4(nn.Module): + # csp-elan + def __init__(self, c1, c2, c3, c4, c5=1): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + self.c = c3//2 + self.cv1 = Conv(c1, c3, 1, 1) + self.cv2 = nn.Sequential(RepNCSP(c3//2, c4, c5), Conv(c4, c4, 3, 1)) + self.cv3 = nn.Sequential(RepNCSP(c4, c4, c5), Conv(c4, c4, 3, 1)) + self.cv4 = Conv(c3+(2*c4), c2, 1, 1) + + def forward(self, x): + y = list(self.cv1(x).chunk(2, 1)) + y.extend((m(y[-1])) for m in [self.cv2, self.cv3]) + return self.cv4(torch.cat(y, 1)) + + def forward_split(self, x): + y = list(self.cv1(x).split((self.c, self.c), 1)) + y.extend(m(y[-1]) for m in [self.cv2, self.cv3]) + return self.cv4(torch.cat(y, 1)) + +################# + + +##### YOLOR ##### + +class ImplicitA(nn.Module): + def __init__(self, channel): + super(ImplicitA, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) + nn.init.normal_(self.implicit, std=.02) + + def forward(self, x): + return self.implicit + x + + +class ImplicitM(nn.Module): + def __init__(self, channel): + super(ImplicitM, self).__init__() + self.channel = channel + self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) + nn.init.normal_(self.implicit, mean=1., std=.02) + + def forward(self, x): + return self.implicit * x + +################# + + +##### CBNet ##### + +class CBLinear(nn.Module): + def __init__(self, c1, c2s, k=1, s=1, p=None, g=1): # ch_in, ch_outs, kernel, stride, padding, groups + super(CBLinear, self).__init__() + self.c2s = c2s + self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True) + + def forward(self, x): + outs = self.conv(x).split(self.c2s, dim=1) + return outs + +class CBFuse(nn.Module): + def __init__(self, idx): + super(CBFuse, self).__init__() + self.idx = idx + + def forward(self, xs): + target_size = xs[-1].shape[2:] + res = [F.interpolate(x[self.idx[i]], size=target_size, mode='nearest') for i, x in enumerate(xs[:-1])] + out = torch.sum(torch.stack(res + xs[-1:]), dim=0) + return out + +################# + + +class DetectMultiBackend(nn.Module): + # YOLO MultiBackend class for python inference on various backends + def __init__(self, weights='yolo.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True): + # Usage: + # PyTorch: weights = *.pt + # TorchScript: *.torchscript + # ONNX Runtime: *.onnx + # ONNX OpenCV DNN: *.onnx --dnn + # OpenVINO: *_openvino_model + # CoreML: *.mlmodel + # TensorRT: *.engine + # TensorFlow SavedModel: *_saved_model + # TensorFlow GraphDef: *.pb + # TensorFlow Lite: *.tflite + # TensorFlow Edge TPU: *_edgetpu.tflite + # PaddlePaddle: *_paddle_model + from models.experimental import attempt_download, attempt_load # scoped to avoid circular import + + super().__init__() + w = str(weights[0] if isinstance(weights, list) else weights) + pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w) + fp16 &= pt or jit or onnx or engine # FP16 + nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH) + stride = 32 # default stride + cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA + if not (pt or triton): + w = attempt_download(w) # download if not local + + if pt: # PyTorch + model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse) + stride = max(int(model.stride.max()), 32) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names + model.half() if fp16 else model.float() + self.model = model # explicitly assign for to(), cpu(), cuda(), half() + elif jit: # TorchScript + LOGGER.info(f'Loading {w} for TorchScript inference...') + extra_files = {'config.txt': ''} # model metadata + model = torch.jit.load(w, _extra_files=extra_files, map_location=device) + model.half() if fp16 else model.float() + if extra_files['config.txt']: # load metadata dict + d = json.loads(extra_files['config.txt'], + object_hook=lambda d: {int(k) if k.isdigit() else k: v + for k, v in d.items()}) + stride, names = int(d['stride']), d['names'] + elif dnn: # ONNX OpenCV DNN + LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') + check_requirements('opencv-python>=4.5.4') + net = cv2.dnn.readNetFromONNX(w) + elif onnx: # ONNX Runtime + LOGGER.info(f'Loading {w} for ONNX Runtime inference...') + check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime')) + import onnxruntime + providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider'] + session = onnxruntime.InferenceSession(w, providers=providers) + output_names = [x.name for x in session.get_outputs()] + meta = session.get_modelmeta().custom_metadata_map # metadata + if 'stride' in meta: + stride, names = int(meta['stride']), eval(meta['names']) + elif xml: # OpenVINO + LOGGER.info(f'Loading {w} for OpenVINO inference...') + check_requirements('openvino') # requires openvino-dev: https://pypi.org/project/openvino-dev/ + from openvino.runtime import Core, Layout, get_batch + ie = Core() + if not Path(w).is_file(): # if not *.xml + w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir + network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin')) + if network.get_parameters()[0].get_layout().empty: + network.get_parameters()[0].set_layout(Layout("NCHW")) + batch_dim = get_batch(network) + if batch_dim.is_static: + batch_size = batch_dim.get_length() + executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2 + stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata + elif engine: # TensorRT + LOGGER.info(f'Loading {w} for TensorRT inference...') + import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download + check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0 + if device.type == 'cpu': + device = torch.device('cuda:0') + Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) + logger = trt.Logger(trt.Logger.INFO) + with open(w, 'rb') as f, trt.Runtime(logger) as runtime: + model = runtime.deserialize_cuda_engine(f.read()) + context = model.create_execution_context() + bindings = OrderedDict() + output_names = [] + fp16 = False # default updated below + dynamic = False + for i in range(model.num_bindings): + name = model.get_binding_name(i) + dtype = trt.nptype(model.get_binding_dtype(i)) + if model.binding_is_input(i): + if -1 in tuple(model.get_binding_shape(i)): # dynamic + dynamic = True + context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2])) + if dtype == np.float16: + fp16 = True + else: # output + output_names.append(name) + shape = tuple(context.get_binding_shape(i)) + im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) + bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr())) + binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) + batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size + elif coreml: # CoreML + LOGGER.info(f'Loading {w} for CoreML inference...') + import coremltools as ct + model = ct.models.MLModel(w) + elif saved_model: # TF SavedModel + LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...') + import tensorflow as tf + keras = False # assume TF1 saved_model + model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w) + elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt + LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...') + import tensorflow as tf + + def wrap_frozen_graph(gd, inputs, outputs): + x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped + ge = x.graph.as_graph_element + return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) + + def gd_outputs(gd): + name_list, input_list = [], [] + for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef + name_list.append(node.name) + input_list.extend(node.input) + return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp')) + + gd = tf.Graph().as_graph_def() # TF GraphDef + with open(w, 'rb') as f: + gd.ParseFromString(f.read()) + frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd)) + elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu + from tflite_runtime.interpreter import Interpreter, load_delegate + except ImportError: + import tensorflow as tf + Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate, + if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime + LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...') + delegate = { + 'Linux': 'libedgetpu.so.1', + 'Darwin': 'libedgetpu.1.dylib', + 'Windows': 'edgetpu.dll'}[platform.system()] + interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) + else: # TFLite + LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') + interpreter = Interpreter(model_path=w) # load TFLite model + interpreter.allocate_tensors() # allocate + input_details = interpreter.get_input_details() # inputs + output_details = interpreter.get_output_details() # outputs + # load metadata + with contextlib.suppress(zipfile.BadZipFile): + with zipfile.ZipFile(w, "r") as model: + meta_file = model.namelist()[0] + meta = ast.literal_eval(model.read(meta_file).decode("utf-8")) + stride, names = int(meta['stride']), meta['names'] + elif tfjs: # TF.js + raise NotImplementedError('ERROR: YOLO TF.js inference is not supported') + elif paddle: # PaddlePaddle + LOGGER.info(f'Loading {w} for PaddlePaddle inference...') + check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle') + import paddle.inference as pdi + if not Path(w).is_file(): # if not *.pdmodel + w = next(Path(w).rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir + weights = Path(w).with_suffix('.pdiparams') + config = pdi.Config(str(w), str(weights)) + if cuda: + config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0) + predictor = pdi.create_predictor(config) + input_handle = predictor.get_input_handle(predictor.get_input_names()[0]) + output_names = predictor.get_output_names() + elif triton: # NVIDIA Triton Inference Server + LOGGER.info(f'Using {w} as Triton Inference Server...') + check_requirements('tritonclient[all]') + from utils.triton import TritonRemoteModel + model = TritonRemoteModel(url=w) + nhwc = model.runtime.startswith("tensorflow") + else: + raise NotImplementedError(f'ERROR: {w} is not a supported format') + + # class names + if 'names' not in locals(): + names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)} + if names[0] == 'n01440764' and len(names) == 1000: # ImageNet + names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names + + self.__dict__.update(locals()) # assign all variables to self + + def forward(self, im, augment=False, visualize=False): + # YOLO MultiBackend inference + b, ch, h, w = im.shape # batch, channel, height, width + if self.fp16 and im.dtype != torch.float16: + im = im.half() # to FP16 + if self.nhwc: + im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3) + + if self.pt: # PyTorch + y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im) + elif self.jit: # TorchScript + y = self.model(im) + elif self.dnn: # ONNX OpenCV DNN + im = im.cpu().numpy() # torch to numpy + self.net.setInput(im) + y = self.net.forward() + elif self.onnx: # ONNX Runtime + im = im.cpu().numpy() # torch to numpy + y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) + elif self.xml: # OpenVINO + im = im.cpu().numpy() # FP32 + y = list(self.executable_network([im]).values()) + elif self.engine: # TensorRT + if self.dynamic and im.shape != self.bindings['images'].shape: + i = self.model.get_binding_index('images') + self.context.set_binding_shape(i, im.shape) # reshape if dynamic + self.bindings['images'] = self.bindings['images']._replace(shape=im.shape) + for name in self.output_names: + i = self.model.get_binding_index(name) + self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i))) + s = self.bindings['images'].shape + assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}" + self.binding_addrs['images'] = int(im.data_ptr()) + self.context.execute_v2(list(self.binding_addrs.values())) + y = [self.bindings[x].data for x in sorted(self.output_names)] + elif self.coreml: # CoreML + im = im.cpu().numpy() + im = Image.fromarray((im[0] * 255).astype('uint8')) + # im = im.resize((192, 320), Image.ANTIALIAS) + y = self.model.predict({'image': im}) # coordinates are xywh normalized + if 'confidence' in y: + box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels + conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) + y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) + else: + y = list(reversed(y.values())) # reversed for segmentation models (pred, proto) + elif self.paddle: # PaddlePaddle + im = im.cpu().numpy().astype(np.float32) + self.input_handle.copy_from_cpu(im) + self.predictor.run() + y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names] + elif self.triton: # NVIDIA Triton Inference Server + y = self.model(im) + else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) + im = im.cpu().numpy() + if self.saved_model: # SavedModel + y = self.model(im, training=False) if self.keras else self.model(im) + elif self.pb: # GraphDef + y = self.frozen_func(x=self.tf.constant(im)) + else: # Lite or Edge TPU + input = self.input_details[0] + int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model + if int8: + scale, zero_point = input['quantization'] + im = (im / scale + zero_point).astype(np.uint8) # de-scale + self.interpreter.set_tensor(input['index'], im) + self.interpreter.invoke() + y = [] + for output in self.output_details: + x = self.interpreter.get_tensor(output['index']) + if int8: + scale, zero_point = output['quantization'] + x = (x.astype(np.float32) - zero_point) * scale # re-scale + y.append(x) + y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y] + y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels + + if isinstance(y, (list, tuple)): + return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y] + else: + return self.from_numpy(y) + + def from_numpy(self, x): + return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x + + def warmup(self, imgsz=(1, 3, 640, 640)): + # Warmup model by running inference once + warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton + if any(warmup_types) and (self.device.type != 'cpu' or self.triton): + im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input + for _ in range(2 if self.jit else 1): # + self.forward(im) # warmup + + @staticmethod + def _model_type(p='path/to/model.pt'): + # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx + # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle] + from export import export_formats + from utils.downloads import is_url + sf = list(export_formats().Suffix) # export suffixes + if not is_url(p, check=False): + check_suffix(p, sf) # checks + url = urlparse(p) # if url may be Triton inference server + types = [s in Path(p).name for s in sf] + types[8] &= not types[9] # tflite &= not edgetpu + triton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc]) + return types + [triton] + + @staticmethod + def _load_metadata(f=Path('path/to/meta.yaml')): + # Load metadata from meta.yaml if it exists + if f.exists(): + d = yaml_load(f) + return d['stride'], d['names'] # assign stride, names + return None, None + + +class AutoShape(nn.Module): + # YOLO input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS + conf = 0.25 # NMS confidence threshold + iou = 0.45 # NMS IoU threshold + agnostic = False # NMS class-agnostic + multi_label = False # NMS multiple labels per box + classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs + max_det = 1000 # maximum number of detections per image + amp = False # Automatic Mixed Precision (AMP) inference + + def __init__(self, model, verbose=True): + super().__init__() + if verbose: + LOGGER.info('Adding AutoShape... ') + copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes + self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance + self.pt = not self.dmb or model.pt # PyTorch model + self.model = model.eval() + if self.pt: + m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() + m.inplace = False # Detect.inplace=False for safe multithread inference + m.export = True # do not output loss values + + def _apply(self, fn): + # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers + self = super()._apply(fn) + from models.yolo import Detect, Segment + if self.pt: + m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect() + if isinstance(m, (Detect, Segment)): + for k in 'stride', 'anchor_grid', 'stride_grid', 'grid': + x = getattr(m, k) + setattr(m, k, list(map(fn, x))) if isinstance(x, (list, tuple)) else setattr(m, k, fn(x)) + return self + + @smart_inference_mode() + def forward(self, ims, size=640, augment=False, profile=False): + # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are: + # file: ims = 'data/images/zidane.jpg' # str or PosixPath + # URI: = 'https://ultralytics.com/images/zidane.jpg' + # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3) + # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3) + # numpy: = np.zeros((640,1280,3)) # HWC + # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values) + # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images + + dt = (Profile(), Profile(), Profile()) + with dt[0]: + if isinstance(size, int): # expand + size = (size, size) + p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param + autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference + if isinstance(ims, torch.Tensor): # torch + with amp.autocast(autocast): + return self.model(ims.to(p.device).type_as(p), augment=augment) # inference + + # Pre-process + n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images + shape0, shape1, files = [], [], [] # image and inference shapes, filenames + for i, im in enumerate(ims): + f = f'image{i}' # filename + if isinstance(im, (str, Path)): # filename or uri + im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im + im = np.asarray(exif_transpose(im)) + elif isinstance(im, Image.Image): # PIL Image + im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f + files.append(Path(f).with_suffix('.jpg').name) + if im.shape[0] < 5: # image in CHW + im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) + im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input + s = im.shape[:2] # HWC + shape0.append(s) # image shape + g = max(size) / max(s) # gain + shape1.append([int(y * g) for y in s]) + ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update + shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] # inf shape + x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad + x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW + x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 + + with amp.autocast(autocast): + # Inference + with dt[1]: + y = self.model(x, augment=augment) # forward + + # Post-process + with dt[2]: + y = non_max_suppression(y if self.dmb else y[0], + self.conf, + self.iou, + self.classes, + self.agnostic, + self.multi_label, + max_det=self.max_det) # NMS + for i in range(n): + scale_boxes(shape1, y[i][:, :4], shape0[i]) + + return Detections(ims, y, files, dt, self.names, x.shape) + + +class Detections: + # YOLO detections class for inference results + def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None): + super().__init__() + d = pred[0].device # device + gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations + self.ims = ims # list of images as numpy arrays + self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) + self.names = names # class names + self.files = files # image filenames + self.times = times # profiling times + self.xyxy = pred # xyxy pixels + self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels + self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized + self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized + self.n = len(self.pred) # number of images (batch size) + self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms) + self.s = tuple(shape) # inference BCHW shape + + def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')): + s, crops = '', [] + for i, (im, pred) in enumerate(zip(self.ims, self.pred)): + s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string + if pred.shape[0]: + for c in pred[:, -1].unique(): + n = (pred[:, -1] == c).sum() # detections per class + s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string + s = s.rstrip(', ') + if show or save or render or crop: + annotator = Annotator(im, example=str(self.names)) + for *box, conf, cls in reversed(pred): # xyxy, confidence, class + label = f'{self.names[int(cls)]} {conf:.2f}' + if crop: + file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None + crops.append({ + 'box': box, + 'conf': conf, + 'cls': cls, + 'label': label, + 'im': save_one_box(box, im, file=file, save=save)}) + else: # all others + annotator.box_label(box, label if labels else '', color=colors(cls)) + im = annotator.im + else: + s += '(no detections)' + + im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np + if show: + display(im) if is_notebook() else im.show(self.files[i]) + if save: + f = self.files[i] + im.save(save_dir / f) # save + if i == self.n - 1: + LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}") + if render: + self.ims[i] = np.asarray(im) + if pprint: + s = s.lstrip('\n') + return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t + if crop: + if save: + LOGGER.info(f'Saved results to {save_dir}\n') + return crops + + @TryExcept('Showing images is not supported in this environment') + def show(self, labels=True): + self._run(show=True, labels=labels) # show results + + def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False): + save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir + self._run(save=True, labels=labels, save_dir=save_dir) # save results + + def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False): + save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None + return self._run(crop=True, save=save, save_dir=save_dir) # crop results + + def render(self, labels=True): + self._run(render=True, labels=labels) # render results + return self.ims + + def pandas(self): + # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0]) + new = copy(self) # return copy + ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns + cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns + for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]): + a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update + setattr(new, k, [pd.DataFrame(x, columns=c) for x in a]) + return new + + def tolist(self): + # return a list of Detections objects, i.e. 'for result in results.tolist():' + r = range(self.n) # iterable + x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r] + # for d in x: + # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: + # setattr(d, k, getattr(d, k)[0]) # pop out of list + return x + + def print(self): + LOGGER.info(self.__str__()) + + def __len__(self): # override len(results) + return self.n + + def __str__(self): # override print(results) + return self._run(pprint=True) # print results + + def __repr__(self): + return f'YOLO {self.__class__} instance\n' + self.__str__() + + +class Proto(nn.Module): + # YOLO mask Proto module for segmentation models + def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks + super().__init__() + self.cv1 = Conv(c1, c_, k=3) + self.upsample = nn.Upsample(scale_factor=2, mode='nearest') + self.cv2 = Conv(c_, c_, k=3) + self.cv3 = Conv(c_, c2) + + def forward(self, x): + return self.cv3(self.cv2(self.upsample(self.cv1(x)))) + + +class Classify(nn.Module): + # YOLO classification head, i.e. x(b,c1,20,20) to x(b,c2) + def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() + c_ = 1280 # efficientnet_b0 size + self.conv = Conv(c1, c_, k, s, autopad(k, p), g) + self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1) + self.drop = nn.Dropout(p=0.0, inplace=True) + self.linear = nn.Linear(c_, c2) # to x(b,c2) + + def forward(self, x): + if isinstance(x, list): + x = torch.cat(x, 1) + return self.linear(self.drop(self.pool(self.conv(x)).flatten(1))) diff --git a/models/detect/gelan-c.yaml b/models/detect/gelan-c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..78b41bc39389f633176eccb1f36b685e37ff4347 --- /dev/null +++ b/models/detect/gelan-c.yaml @@ -0,0 +1,80 @@ +# YOLOv9 + +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +#activation: nn.LeakyReLU(0.1) +#activation: nn.ReLU() + +# anchors +anchors: 3 + +# gelan backbone +backbone: + [ + # conv down + [-1, 1, Conv, [64, 3, 2]], # 0-P1/2 + + # conv down + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + + # elan-1 block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2 + + # avg-conv down + [-1, 1, ADown, [256]], # 3-P3/8 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4 + + # avg-conv down + [-1, 1, ADown, [512]], # 5-P4/16 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6 + + # avg-conv down + [-1, 1, ADown, [512]], # 7-P5/32 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8 + ] + +# gelan head +head: + [ + # elan-spp block + [-1, 1, SPPELAN, [512, 256]], # 9 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small) + + # avg-conv-down merge + [-1, 1, ADown, [256]], + [[-1, 12], 1, Concat, [1]], # cat head P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium) + + # avg-conv-down merge + [-1, 1, ADown, [512]], + [[-1, 9], 1, Concat, [1]], # cat head P5 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large) + + # detect + [[15, 18, 21], 1, DDetect, [nc]], # DDetect(P3, P4, P5) + ] diff --git a/models/detect/gelan-e.yaml b/models/detect/gelan-e.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0409bab70a6c697b0037212b44b0f25de7c1525 --- /dev/null +++ b/models/detect/gelan-e.yaml @@ -0,0 +1,121 @@ +# YOLOv9 + +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +#activation: nn.LeakyReLU(0.1) +#activation: nn.ReLU() + +# anchors +anchors: 3 + +# gelan backbone +backbone: + [ + [-1, 1, Silence, []], + + # conv down + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + + # conv down + [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 + + # elan-1 block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 3 + + # avg-conv down + [-1, 1, ADown, [256]], # 4-P3/8 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 5 + + # avg-conv down + [-1, 1, ADown, [512]], # 6-P4/16 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 7 + + # avg-conv down + [-1, 1, ADown, [1024]], # 8-P5/32 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 9 + + # routing + [1, 1, CBLinear, [[64]]], # 10 + [3, 1, CBLinear, [[64, 128]]], # 11 + [5, 1, CBLinear, [[64, 128, 256]]], # 12 + [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13 + [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14 + + # conv down fuse + [0, 1, Conv, [64, 3, 2]], # 15-P1/2 + [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16 + + # conv down fuse + [-1, 1, Conv, [128, 3, 2]], # 17-P2/4 + [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18 + + # elan-1 block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 19 + + # avg-conv down fuse + [-1, 1, ADown, [256]], # 20-P3/8 + [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 22 + + # avg-conv down fuse + [-1, 1, ADown, [512]], # 23-P4/16 + [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 25 + + # avg-conv down fuse + [-1, 1, ADown, [1024]], # 26-P5/32 + [[14, -1], 1, CBFuse, [[4]]], # 27 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 28 + ] + +# gelan head +head: + [ + # elan-spp block + [28, 1, SPPELAN, [512, 256]], # 29 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 25], 1, Concat, [1]], # cat backbone P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 22], 1, Concat, [1]], # cat backbone P3 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35 (P3/8-small) + + # avg-conv-down merge + [-1, 1, ADown, [256]], + [[-1, 32], 1, Concat, [1]], # cat head P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 38 (P4/16-medium) + + # avg-conv-down merge + [-1, 1, ADown, [512]], + [[-1, 29], 1, Concat, [1]], # cat head P5 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 41 (P5/32-large) + + # detect + [[35, 38, 41], 1, DDetect, [nc]], # Detect(P3, P4, P5) + ] diff --git a/models/detect/gelan.yaml b/models/detect/gelan.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ecd54d47d0f73701699d8fdf554488564b640ff7 --- /dev/null +++ b/models/detect/gelan.yaml @@ -0,0 +1,80 @@ +# YOLOv9 + +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +#activation: nn.LeakyReLU(0.1) +#activation: nn.ReLU() + +# anchors +anchors: 3 + +# gelan backbone +backbone: + [ + # conv down + [-1, 1, Conv, [64, 3, 2]], # 0-P1/2 + + # conv down + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + + # elan-1 block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 2 + + # avg-conv down + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 4 + + # avg-conv down + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 6 + + # avg-conv down + [-1, 1, Conv, [512, 3, 2]], # 7-P5/32 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 8 + ] + +# gelan head +head: + [ + # elan-spp block + [-1, 1, SPPELAN, [512, 256]], # 9 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 12 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 15 (P3/8-small) + + # avg-conv-down merge + [-1, 1, Conv, [256, 3, 2]], + [[-1, 12], 1, Concat, [1]], # cat head P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 18 (P4/16-medium) + + # avg-conv-down merge + [-1, 1, Conv, [512, 3, 2]], + [[-1, 9], 1, Concat, [1]], # cat head P5 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 21 (P5/32-large) + + # detect + [[15, 18, 21], 1, DDetect, [nc]], # Detect(P3, P4, P5) + ] diff --git a/models/detect/yolov7-af.yaml b/models/detect/yolov7-af.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f739df15e6a19d4d4c6b5ed7e912fa6ef3064e59 --- /dev/null +++ b/models/detect/yolov7-af.yaml @@ -0,0 +1,137 @@ +# YOLOv7 + +# Parameters +nc: 80 # number of classes +depth_multiple: 1. # model depth multiple +width_multiple: 1. # layer channel multiple +anchors: 3 + +# YOLOv7 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [32, 3, 1]], # 0 + + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + [-1, 1, Conv, [64, 3, 1]], + + [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 + [-1, 1, Conv, [64, 1, 1]], + [-2, 1, Conv, [64, 1, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [256, 1, 1]], # 11 + + [-1, 1, MP, []], + [-1, 1, Conv, [128, 1, 1]], + [-3, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [128, 3, 2]], + [[-1, -3], 1, Concat, [1]], # 16-P3/8 + [-1, 1, Conv, [128, 1, 1]], + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [512, 1, 1]], # 24 + + [-1, 1, MP, []], + [-1, 1, Conv, [256, 1, 1]], + [-3, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 2]], + [[-1, -3], 1, Concat, [1]], # 29-P4/16 + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [1024, 1, 1]], # 37 + + [-1, 1, MP, []], + [-1, 1, Conv, [512, 1, 1]], + [-3, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [512, 3, 2]], + [[-1, -3], 1, Concat, [1]], # 42-P5/32 + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [1024, 1, 1]], # 50 + ] + +# yolov7 head +head: + [[-1, 1, SPPCSPC, [512]], # 51 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [37, 1, Conv, [256, 1, 1]], # route backbone P4 + [[-1, -2], 1, Concat, [1]], + + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [256, 1, 1]], # 63 + + [-1, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [24, 1, Conv, [128, 1, 1]], # route backbone P3 + [[-1, -2], 1, Concat, [1]], + + [-1, 1, Conv, [128, 1, 1]], + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [128, 1, 1]], # 75 + + [-1, 1, MP, []], + [-1, 1, Conv, [128, 1, 1]], + [-3, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [128, 3, 2]], + [[-1, -3, 63], 1, Concat, [1]], + + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [256, 1, 1]], # 88 + + [-1, 1, MP, []], + [-1, 1, Conv, [256, 1, 1]], + [-3, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 2]], + [[-1, -3, 51], 1, Concat, [1]], + + [-1, 1, Conv, [512, 1, 1]], + [-2, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [512, 1, 1]], # 101 + + [75, 1, Conv, [256, 3, 1]], + [88, 1, Conv, [512, 3, 1]], + [101, 1, Conv, [1024, 3, 1]], + + [[102, 103, 104], 1, Detect, [nc]], # Detect(P3, P4, P5) + ] diff --git a/models/detect/yolov9-c.yaml b/models/detect/yolov9-c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df8d31d2f1d37c97759caea5da4ab6a86d6d1a17 --- /dev/null +++ b/models/detect/yolov9-c.yaml @@ -0,0 +1,124 @@ +# YOLOv9 + +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +#activation: nn.LeakyReLU(0.1) +#activation: nn.ReLU() + +# anchors +anchors: 3 + +# YOLOv9 backbone +backbone: + [ + [-1, 1, Silence, []], + + # conv down + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + + # conv down + [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 + + # elan-1 block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3 + + # avg-conv down + [-1, 1, ADown, [256]], # 4-P3/8 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5 + + # avg-conv down + [-1, 1, ADown, [512]], # 6-P4/16 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7 + + # avg-conv down + [-1, 1, ADown, [512]], # 8-P5/32 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9 + ] + +# YOLOv9 head +head: + [ + # elan-spp block + [-1, 1, SPPELAN, [512, 256]], # 10 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 7], 1, Concat, [1]], # cat backbone P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 5], 1, Concat, [1]], # cat backbone P3 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small) + + # avg-conv-down merge + [-1, 1, ADown, [256]], + [[-1, 13], 1, Concat, [1]], # cat head P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium) + + # avg-conv-down merge + [-1, 1, ADown, [512]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large) + + + # multi-level reversible auxiliary branch + + # routing + [5, 1, CBLinear, [[256]]], # 23 + [7, 1, CBLinear, [[256, 512]]], # 24 + [9, 1, CBLinear, [[256, 512, 512]]], # 25 + + # conv down + [0, 1, Conv, [64, 3, 2]], # 26-P1/2 + + # conv down + [-1, 1, Conv, [128, 3, 2]], # 27-P2/4 + + # elan-1 block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28 + + # avg-conv down fuse + [-1, 1, ADown, [256]], # 29-P3/8 + [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31 + + # avg-conv down fuse + [-1, 1, ADown, [512]], # 32-P4/16 + [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34 + + # avg-conv down fuse + [-1, 1, ADown, [512]], # 35-P5/32 + [[25, -1], 1, CBFuse, [[2]]], # 36 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37 + + + + # detection head + + # detect + [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) + ] diff --git a/models/detect/yolov9-e.yaml b/models/detect/yolov9-e.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dcb122b61cf02eb3f599414f3b7431ddf7b9b898 --- /dev/null +++ b/models/detect/yolov9-e.yaml @@ -0,0 +1,144 @@ +# YOLOv9 + +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +#activation: nn.LeakyReLU(0.1) +#activation: nn.ReLU() + +# anchors +anchors: 3 + +# YOLOv9 backbone +backbone: + [ + [-1, 1, Silence, []], + + # conv down + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + + # conv down + [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 3 + + # avg-conv down + [-1, 1, ADown, [256]], # 4-P3/8 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 5 + + # avg-conv down + [-1, 1, ADown, [512]], # 6-P4/16 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 7 + + # avg-conv down + [-1, 1, ADown, [1024]], # 8-P5/32 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 9 + + # routing + [1, 1, CBLinear, [[64]]], # 10 + [3, 1, CBLinear, [[64, 128]]], # 11 + [5, 1, CBLinear, [[64, 128, 256]]], # 12 + [7, 1, CBLinear, [[64, 128, 256, 512]]], # 13 + [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]], # 14 + + # conv down + [0, 1, Conv, [64, 3, 2]], # 15-P1/2 + [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]], # 16 + + # conv down + [-1, 1, Conv, [128, 3, 2]], # 17-P2/4 + [[11, 12, 13, 14, -1], 1, CBFuse, [[1, 1, 1, 1]]], # 18 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 2]], # 19 + + # avg-conv down fuse + [-1, 1, ADown, [256]], # 20-P3/8 + [[12, 13, 14, -1], 1, CBFuse, [[2, 2, 2]]], # 21 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 2]], # 22 + + # avg-conv down fuse + [-1, 1, ADown, [512]], # 23-P4/16 + [[13, 14, -1], 1, CBFuse, [[3, 3]]], # 24 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 25 + + # avg-conv down fuse + [-1, 1, ADown, [1024]], # 26-P5/32 + [[14, -1], 1, CBFuse, [[4]]], # 27 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]], # 28 + ] + +# YOLOv9 head +head: + [ + # multi-level auxiliary branch + + # elan-spp block + [9, 1, SPPELAN, [512, 256]], # 29 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 7], 1, Concat, [1]], # cat backbone P4 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 32 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 5], 1, Concat, [1]], # cat backbone P3 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 35 + + + + # main branch + + # elan-spp block + [28, 1, SPPELAN, [512, 256]], # 36 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 25], 1, Concat, [1]], # cat backbone P4 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 39 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 22], 1, Concat, [1]], # cat backbone P3 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [256, 256, 128, 2]], # 42 (P3/8-small) + + # avg-conv-down merge + [-1, 1, ADown, [256]], + [[-1, 39], 1, Concat, [1]], # cat head P4 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 2]], # 45 (P4/16-medium) + + # avg-conv-down merge + [-1, 1, ADown, [512]], + [[-1, 36], 1, Concat, [1]], # cat head P5 + + # csp-elan block + [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]], # 48 (P5/32-large) + + # detect + [[35, 32, 29, 42, 45, 48], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) + ] diff --git a/models/detect/yolov9.yaml b/models/detect/yolov9.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4075a7e395c140d875f1aac9d7564b9f58cdf2da --- /dev/null +++ b/models/detect/yolov9.yaml @@ -0,0 +1,117 @@ +# YOLOv9 + +# parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +#activation: nn.LeakyReLU(0.1) +#activation: nn.ReLU() + +# anchors +anchors: 3 + +# YOLOv9 backbone +backbone: + [ + [-1, 1, Silence, []], + + # conv down + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + + # conv down + [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 + + # elan-1 block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 3 + + # conv down + [-1, 1, Conv, [256, 3, 2]], # 4-P3/8 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 5 + + # conv down + [-1, 1, Conv, [512, 3, 2]], # 6-P4/16 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 7 + + # conv down + [-1, 1, Conv, [512, 3, 2]], # 8-P5/32 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 9 + ] + +# YOLOv9 head +head: + [ + # elan-spp block + [-1, 1, SPPELAN, [512, 256]], # 10 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 7], 1, Concat, [1]], # cat backbone P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 13 + + # up-concat merge + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 5], 1, Concat, [1]], # cat backbone P3 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [256, 256, 128, 1]], # 16 (P3/8-small) + + # conv-down merge + [-1, 1, Conv, [256, 3, 2]], + [[-1, 13], 1, Concat, [1]], # cat head P4 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 19 (P4/16-medium) + + # conv-down merge + [-1, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 22 (P5/32-large) + + # routing + [5, 1, CBLinear, [[256]]], # 23 + [7, 1, CBLinear, [[256, 512]]], # 24 + [9, 1, CBLinear, [[256, 512, 512]]], # 25 + + # conv down + [0, 1, Conv, [64, 3, 2]], # 26-P1/2 + + # conv down + [-1, 1, Conv, [128, 3, 2]], # 27-P2/4 + + # elan-1 block + [-1, 1, RepNCSPELAN4, [256, 128, 64, 1]], # 28 + + # conv down fuse + [-1, 1, Conv, [256, 3, 2]], # 29-P3/8 + [[23, 24, 25, -1], 1, CBFuse, [[0, 0, 0]]], # 30 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 256, 128, 1]], # 31 + + # conv down fuse + [-1, 1, Conv, [512, 3, 2]], # 32-P4/16 + [[24, 25, -1], 1, CBFuse, [[1, 1]]], # 33 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 34 + + # conv down fuse + [-1, 1, Conv, [512, 3, 2]], # 35-P5/32 + [[25, -1], 1, CBFuse, [[2]]], # 36 + + # elan-2 block + [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]], # 37 + + # detect + [[31, 34, 37, 16, 19, 22], 1, DualDDetect, [nc]], # DualDDetect(A3, A4, A5, P3, P4, P5) + ] diff --git a/models/experimental.py b/models/experimental.py new file mode 100644 index 0000000000000000000000000000000000000000..ae087c50e02eb2de5692f90d7c12b33c5b76c635 --- /dev/null +++ b/models/experimental.py @@ -0,0 +1,107 @@ +import math + +import numpy as np +import torch +import torch.nn as nn + +from utils.downloads import attempt_download + + +class Sum(nn.Module): + # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 + def __init__(self, n, weight=False): # n: number of inputs + super().__init__() + self.weight = weight # apply weights boolean + self.iter = range(n - 1) # iter object + if weight: + self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights + + def forward(self, x): + y = x[0] # no weight + if self.weight: + w = torch.sigmoid(self.w) * 2 + for i in self.iter: + y = y + x[i + 1] * w[i] + else: + for i in self.iter: + y = y + x[i + 1] + return y + + +class MixConv2d(nn.Module): + # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595 + def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch_out, kernel, stride, ch_strategy + super().__init__() + n = len(k) # number of convolutions + if equal_ch: # equal c_ per group + i = torch.linspace(0, n - 1E-6, c2).floor() # c2 indices + c_ = [(i == g).sum() for g in range(n)] # intermediate channels + else: # equal weight.numel() per group + b = [c2] + [0] * n + a = np.eye(n + 1, n, k=-1) + a -= np.roll(a, 1, axis=1) + a *= np.array(k) ** 2 + a[0] = 1 + c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b + + self.m = nn.ModuleList([ + nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.SiLU() + + def forward(self, x): + return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) + + +class Ensemble(nn.ModuleList): + # Ensemble of models + def __init__(self): + super().__init__() + + def forward(self, x, augment=False, profile=False, visualize=False): + y = [module(x, augment, profile, visualize)[0] for module in self] + # y = torch.stack(y).max(0)[0] # max ensemble + # y = torch.stack(y).mean(0) # mean ensemble + y = torch.cat(y, 1) # nms ensemble + return y, None # inference, train output + + +def attempt_load(weights, device=None, inplace=True, fuse=True): + # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a + from models.yolo import Detect, Model + + model = Ensemble() + for w in weights if isinstance(weights, list) else [weights]: + ckpt = torch.load(attempt_download(w), map_location='cpu') # load + ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model + + # Model compatibility updates + if not hasattr(ckpt, 'stride'): + ckpt.stride = torch.tensor([32.]) + if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)): + ckpt.names = dict(enumerate(ckpt.names)) # convert to dict + + model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) # model in eval mode + + # Module compatibility updates + for m in model.modules(): + t = type(m) + if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model): + m.inplace = inplace # torch 1.7.0 compatibility + # if t is Detect and not isinstance(m.anchor_grid, list): + # delattr(m, 'anchor_grid') + # setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl) + elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): + m.recompute_scale_factor = None # torch 1.11.0 compatibility + + # Return model + if len(model) == 1: + return model[-1] + + # Return detection ensemble + print(f'Ensemble created with {weights}\n') + for k in 'names', 'nc', 'yaml': + setattr(model, k, getattr(model[0], k)) + model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride + assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' + return model diff --git a/models/hub/anchors.yaml b/models/hub/anchors.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65e85cf4764a30aa98abcc7f9daefdebbe94b29e --- /dev/null +++ b/models/hub/anchors.yaml @@ -0,0 +1,59 @@ +# YOLOv3 & YOLOv5 +# Default anchors for COCO data + + +# P5 ------------------------------------------------------------------------------------------------------------------- +# P5-640: +anchors_p5_640: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + + +# P6 ------------------------------------------------------------------------------------------------------------------- +# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387 +anchors_p6_640: + - [9,11, 21,19, 17,41] # P3/8 + - [43,32, 39,70, 86,64] # P4/16 + - [65,131, 134,130, 120,265] # P5/32 + - [282,180, 247,354, 512,387] # P6/64 + +# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792 +anchors_p6_1280: + - [19,27, 44,40, 38,94] # P3/8 + - [96,68, 86,152, 180,137] # P4/16 + - [140,301, 303,264, 238,542] # P5/32 + - [436,615, 739,380, 925,792] # P6/64 + +# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187 +anchors_p6_1920: + - [28,41, 67,59, 57,141] # P3/8 + - [144,103, 129,227, 270,205] # P4/16 + - [209,452, 455,396, 358,812] # P5/32 + - [653,922, 1109,570, 1387,1187] # P6/64 + + +# P7 ------------------------------------------------------------------------------------------------------------------- +# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372 +anchors_p7_640: + - [11,11, 13,30, 29,20] # P3/8 + - [30,46, 61,38, 39,92] # P4/16 + - [78,80, 146,66, 79,163] # P5/32 + - [149,150, 321,143, 157,303] # P6/64 + - [257,402, 359,290, 524,372] # P7/128 + +# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818 +anchors_p7_1280: + - [19,22, 54,36, 32,77] # P3/8 + - [70,83, 138,71, 75,173] # P4/16 + - [165,159, 148,334, 375,151] # P5/32 + - [334,317, 251,626, 499,474] # P6/64 + - [750,326, 534,814, 1079,818] # P7/128 + +# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227 +anchors_p7_1920: + - [29,34, 81,55, 47,115] # P3/8 + - [105,124, 207,107, 113,259] # P4/16 + - [247,238, 222,500, 563,227] # P5/32 + - [501,476, 376,939, 749,711] # P6/64 + - [1126,489, 801,1222, 1618,1227] # P7/128 diff --git a/models/hub/yolov3-spp.yaml b/models/hub/yolov3-spp.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6fb1c0b72a99d7ef26211060045d378e814fac50 --- /dev/null +++ b/models/hub/yolov3-spp.yaml @@ -0,0 +1,51 @@ +# YOLOv3 + +# Parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# darknet53 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [32, 3, 1]], # 0 + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + [-1, 1, Bottleneck, [64]], + [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 + [-1, 2, Bottleneck, [128]], + [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 + [-1, 8, Bottleneck, [256]], + [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 + [-1, 8, Bottleneck, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 + [-1, 4, Bottleneck, [1024]], # 10 + ] + +# YOLOv3-SPP head +head: + [[-1, 1, Bottleneck, [1024, False]], + [-1, 1, SPP, [512, [5, 9, 13]]], + [-1, 1, Conv, [1024, 3, 1]], + [-1, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) + + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P3 + [-1, 1, Bottleneck, [256, False]], + [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) + + [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/hub/yolov3-tiny.yaml b/models/hub/yolov3-tiny.yaml new file mode 100644 index 0000000000000000000000000000000000000000..47372e09ae99ed6f0d494cc7811568c109b3f721 --- /dev/null +++ b/models/hub/yolov3-tiny.yaml @@ -0,0 +1,41 @@ +# YOLOv3 + +# Parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +anchors: + - [10,14, 23,27, 37,58] # P4/16 + - [81,82, 135,169, 344,319] # P5/32 + +# YOLOv3-tiny backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [16, 3, 1]], # 0 + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 + [-1, 1, Conv, [32, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 + [-1, 1, Conv, [512, 3, 1]], + [-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11 + [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 + ] + +# YOLOv3-tiny head +head: + [[-1, 1, Conv, [1024, 3, 1]], + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) + + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) + + [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) + ] diff --git a/models/hub/yolov3.yaml b/models/hub/yolov3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3ebd78f5eb66243ca6f0235904d4df9ef05d5f36 --- /dev/null +++ b/models/hub/yolov3.yaml @@ -0,0 +1,51 @@ +# YOLOv3 + +# Parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +anchors: + - [10,13, 16,30, 33,23] # P3/8 + - [30,61, 62,45, 59,119] # P4/16 + - [116,90, 156,198, 373,326] # P5/32 + +# darknet53 backbone +backbone: + # [from, number, module, args] + [[-1, 1, Conv, [32, 3, 1]], # 0 + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + [-1, 1, Bottleneck, [64]], + [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 + [-1, 2, Bottleneck, [128]], + [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 + [-1, 8, Bottleneck, [256]], + [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 + [-1, 8, Bottleneck, [512]], + [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 + [-1, 4, Bottleneck, [1024]], # 10 + ] + +# YOLOv3 head +head: + [[-1, 1, Bottleneck, [1024, False]], + [-1, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [1024, 3, 1]], + [-1, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) + + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 8], 1, Concat, [1]], # cat backbone P4 + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Bottleneck, [512, False]], + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P3 + [-1, 1, Bottleneck, [256, False]], + [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) + + [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + ] diff --git a/models/panoptic/yolov7-af-pan.yaml b/models/panoptic/yolov7-af-pan.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a9bed1d1dda4bfd0415731a999b99c946e35e056 --- /dev/null +++ b/models/panoptic/yolov7-af-pan.yaml @@ -0,0 +1,137 @@ +# YOLOv7 + +# Parameters +nc: 80 # number of classes +sem_nc: 93 # number of stuff classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +anchors: 3 + +# YOLOv7 backbone +backbone: + [[-1, 1, Conv, [32, 3, 1]], # 0 + + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + [-1, 1, Conv, [64, 3, 1]], + + [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 + [-1, 1, Conv, [64, 1, 1]], + [-2, 1, Conv, [64, 1, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [256, 1, 1]], # 11 + + [-1, 1, MP, []], + [-1, 1, Conv, [128, 1, 1]], + [-3, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [128, 3, 2]], + [[-1, -3], 1, Concat, [1]], # 16-P3/8 + [-1, 1, Conv, [128, 1, 1]], + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [512, 1, 1]], # 24 + + [-1, 1, MP, []], + [-1, 1, Conv, [256, 1, 1]], + [-3, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 2]], + [[-1, -3], 1, Concat, [1]], # 29-P4/16 + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [1024, 1, 1]], # 37 + + [-1, 1, MP, []], + [-1, 1, Conv, [512, 1, 1]], + [-3, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [512, 3, 2]], + [[-1, -3], 1, Concat, [1]], # 42-P5/32 + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [1024, 1, 1]], # 50 + ] + +# yolov7 head +head: + [[-1, 1, SPPCSPC, [512]], # 51 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [37, 1, Conv, [256, 1, 1]], # route backbone P4 + [[-1, -2], 1, Concat, [1]], + + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [256, 1, 1]], # 63 + + [-1, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [24, 1, Conv, [128, 1, 1]], # route backbone P3 + [[-1, -2], 1, Concat, [1]], + + [-1, 1, Conv, [128, 1, 1]], + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [128, 1, 1]], # 75 + + [-1, 1, MP, []], + [-1, 1, Conv, [128, 1, 1]], + [-3, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [128, 3, 2]], + [[-1, -3, 63], 1, Concat, [1]], + + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [256, 1, 1]], # 88 + + [-1, 1, MP, []], + [-1, 1, Conv, [256, 1, 1]], + [-3, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 2]], + [[-1, -3, 51], 1, Concat, [1]], + + [-1, 1, Conv, [512, 1, 1]], + [-2, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [512, 1, 1]], # 101 + + [75, 1, Conv, [256, 3, 1]], + [88, 1, Conv, [512, 3, 1]], + [101, 1, Conv, [1024, 3, 1]], + + [[102, 103, 104], 1, Panoptic, [nc, 93, 32, 256]], # Panoptic(P3, P4, P5) + ] diff --git a/models/segment/yolov7-af-seg.yaml b/models/segment/yolov7-af-seg.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e4b61f7b65377ea9ec40595c9034454be431341 --- /dev/null +++ b/models/segment/yolov7-af-seg.yaml @@ -0,0 +1,136 @@ +# YOLOv7 + +# Parameters +nc: 80 # number of classes +depth_multiple: 1.0 # model depth multiple +width_multiple: 1.0 # layer channel multiple +anchors: 3 + +# YOLOv7 backbone +backbone: + [[-1, 1, Conv, [32, 3, 1]], # 0 + + [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 + [-1, 1, Conv, [64, 3, 1]], + + [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 + [-1, 1, Conv, [64, 1, 1]], + [-2, 1, Conv, [64, 1, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [256, 1, 1]], # 11 + + [-1, 1, MP, []], + [-1, 1, Conv, [128, 1, 1]], + [-3, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [128, 3, 2]], + [[-1, -3], 1, Concat, [1]], # 16-P3/8 + [-1, 1, Conv, [128, 1, 1]], + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [512, 1, 1]], # 24 + + [-1, 1, MP, []], + [-1, 1, Conv, [256, 1, 1]], + [-3, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 2]], + [[-1, -3], 1, Concat, [1]], # 29-P4/16 + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [1024, 1, 1]], # 37 + + [-1, 1, MP, []], + [-1, 1, Conv, [512, 1, 1]], + [-3, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [512, 3, 2]], + [[-1, -3], 1, Concat, [1]], # 42-P5/32 + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [[-1, -3, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [1024, 1, 1]], # 50 + ] + +# yolov7 head +head: + [[-1, 1, SPPCSPC, [512]], # 51 + + [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [37, 1, Conv, [256, 1, 1]], # route backbone P4 + [[-1, -2], 1, Concat, [1]], + + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [256, 1, 1]], # 63 + + [-1, 1, Conv, [128, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [24, 1, Conv, [128, 1, 1]], # route backbone P3 + [[-1, -2], 1, Concat, [1]], + + [-1, 1, Conv, [128, 1, 1]], + [-2, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [-1, 1, Conv, [64, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [128, 1, 1]], # 75 + + [-1, 1, MP, []], + [-1, 1, Conv, [128, 1, 1]], + [-3, 1, Conv, [128, 1, 1]], + [-1, 1, Conv, [128, 3, 2]], + [[-1, -3, 63], 1, Concat, [1]], + + [-1, 1, Conv, [256, 1, 1]], + [-2, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [-1, 1, Conv, [128, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [256, 1, 1]], # 88 + + [-1, 1, MP, []], + [-1, 1, Conv, [256, 1, 1]], + [-3, 1, Conv, [256, 1, 1]], + [-1, 1, Conv, [256, 3, 2]], + [[-1, -3, 51], 1, Concat, [1]], + + [-1, 1, Conv, [512, 1, 1]], + [-2, 1, Conv, [512, 1, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [-1, 1, Conv, [256, 3, 1]], + [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], + [-1, 1, Conv, [512, 1, 1]], # 101 + + [75, 1, Conv, [256, 3, 1]], + [88, 1, Conv, [512, 3, 1]], + [101, 1, Conv, [1024, 3, 1]], + + [[102, 103, 104], 1, Segment, [nc, 32, 256]], # Segment(P3, P4, P5) + ] diff --git a/models/tf.py b/models/tf.py new file mode 100644 index 0000000000000000000000000000000000000000..897efafefebb6e176afd1f001ea14d9b922727e9 --- /dev/null +++ b/models/tf.py @@ -0,0 +1,596 @@ +import argparse +import sys +from copy import deepcopy +from pathlib import Path + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLO root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +# ROOT = ROOT.relative_to(Path.cwd()) # relative + +import numpy as np +import tensorflow as tf +import torch +import torch.nn as nn +from tensorflow import keras + +from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv, + DWConvTranspose2d, Focus, autopad) +from models.experimental import MixConv2d, attempt_load +from models.yolo import Detect, Segment +from utils.activations import SiLU +from utils.general import LOGGER, make_divisible, print_args + + +class TFBN(keras.layers.Layer): + # TensorFlow BatchNormalization wrapper + def __init__(self, w=None): + super().__init__() + self.bn = keras.layers.BatchNormalization( + beta_initializer=keras.initializers.Constant(w.bias.numpy()), + gamma_initializer=keras.initializers.Constant(w.weight.numpy()), + moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()), + moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()), + epsilon=w.eps) + + def call(self, inputs): + return self.bn(inputs) + + +class TFPad(keras.layers.Layer): + # Pad inputs in spatial dimensions 1 and 2 + def __init__(self, pad): + super().__init__() + if isinstance(pad, int): + self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]]) + else: # tuple/list + self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]) + + def call(self, inputs): + return tf.pad(inputs, self.pad, mode='constant', constant_values=0) + + +class TFConv(keras.layers.Layer): + # Standard convolution + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): + # ch_in, ch_out, weights, kernel, stride, padding, groups + super().__init__() + assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" + # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding) + # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch + conv = keras.layers.Conv2D( + filters=c2, + kernel_size=k, + strides=s, + padding='SAME' if s == 1 else 'VALID', + use_bias=not hasattr(w, 'bn'), + kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()), + bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy())) + self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv]) + self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity + self.act = activations(w.act) if act else tf.identity + + def call(self, inputs): + return self.act(self.bn(self.conv(inputs))) + + +class TFDWConv(keras.layers.Layer): + # Depthwise convolution + def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None): + # ch_in, ch_out, weights, kernel, stride, padding, groups + super().__init__() + assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels' + conv = keras.layers.DepthwiseConv2D( + kernel_size=k, + depth_multiplier=c2 // c1, + strides=s, + padding='SAME' if s == 1 else 'VALID', + use_bias=not hasattr(w, 'bn'), + depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()), + bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy())) + self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv]) + self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity + self.act = activations(w.act) if act else tf.identity + + def call(self, inputs): + return self.act(self.bn(self.conv(inputs))) + + +class TFDWConvTranspose2d(keras.layers.Layer): + # Depthwise ConvTranspose2d + def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None): + # ch_in, ch_out, weights, kernel, stride, padding, groups + super().__init__() + assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels' + assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1' + weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy() + self.c1 = c1 + self.conv = [ + keras.layers.Conv2DTranspose(filters=1, + kernel_size=k, + strides=s, + padding='VALID', + output_padding=p2, + use_bias=True, + kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]), + bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)] + + def call(self, inputs): + return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1] + + +class TFFocus(keras.layers.Layer): + # Focus wh information into c-space + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): + # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() + self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv) + + def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) + # inputs = inputs / 255 # normalize 0-255 to 0-1 + inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]] + return self.conv(tf.concat(inputs, 3)) + + +class TFBottleneck(keras.layers.Layer): + # Standard bottleneck + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) + self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2) + self.add = shortcut and c1 == c2 + + def call(self, inputs): + return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs)) + + +class TFCrossConv(keras.layers.Layer): + # Cross Convolution + def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None): + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1) + self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2) + self.add = shortcut and c1 == c2 + + def call(self, inputs): + return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs)) + + +class TFConv2d(keras.layers.Layer): + # Substitution for PyTorch nn.Conv2D + def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None): + super().__init__() + assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument" + self.conv = keras.layers.Conv2D(filters=c2, + kernel_size=k, + strides=s, + padding='VALID', + use_bias=bias, + kernel_initializer=keras.initializers.Constant( + w.weight.permute(2, 3, 1, 0).numpy()), + bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None) + + def call(self, inputs): + return self.conv(inputs) + + +class TFBottleneckCSP(keras.layers.Layer): + # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): + # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) + self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2) + self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3) + self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4) + self.bn = TFBN(w.bn) + self.act = lambda x: keras.activations.swish(x) + self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)]) + + def call(self, inputs): + y1 = self.cv3(self.m(self.cv1(inputs))) + y2 = self.cv2(inputs) + return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3)))) + + +class TFC3(keras.layers.Layer): + # CSP Bottleneck with 3 convolutions + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): + # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) + self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2) + self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3) + self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)]) + + def call(self, inputs): + return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3)) + + +class TFC3x(keras.layers.Layer): + # 3 module with cross-convolutions + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None): + # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) + self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2) + self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3) + self.m = keras.Sequential([ + TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)]) + + def call(self, inputs): + return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3)) + + +class TFSPP(keras.layers.Layer): + # Spatial pyramid pooling layer used in YOLOv3-SPP + def __init__(self, c1, c2, k=(5, 9, 13), w=None): + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) + self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2) + self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k] + + def call(self, inputs): + x = self.cv1(inputs) + return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3)) + + +class TFSPPF(keras.layers.Layer): + # Spatial pyramid pooling-Fast layer + def __init__(self, c1, c2, k=5, w=None): + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1) + self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2) + self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME') + + def call(self, inputs): + x = self.cv1(inputs) + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3)) + + +class TFDetect(keras.layers.Layer): + # TF YOLO Detect layer + def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer + super().__init__() + self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32) + self.nc = nc # number of classes + self.no = nc + 5 # number of outputs per anchor + self.nl = len(anchors) # number of detection layers + self.na = len(anchors[0]) // 2 # number of anchors + self.grid = [tf.zeros(1)] * self.nl # init grid + self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32) + self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2]) + self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] + self.training = False # set to False after building model + self.imgsz = imgsz + for i in range(self.nl): + ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i] + self.grid[i] = self._make_grid(nx, ny) + + def call(self, inputs): + z = [] # inference output + x = [] + for i in range(self.nl): + x.append(self.m[i](inputs[i])) + # x(bs,20,20,255) to x(bs,3,20,20,85) + ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i] + x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no]) + + if not self.training: # inference + y = x[i] + grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5 + anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4 + xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy + wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid + # Normalize xywh to 0-1 to reduce calibration error + xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) + wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) + y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1) + z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no])) + + return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1),) + + @staticmethod + def _make_grid(nx=20, ny=20): + # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) + # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny)) + return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32) + + +class TFSegment(TFDetect): + # YOLO Segment head for segmentation models + def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None): + super().__init__(nc, anchors, ch, imgsz, w) + self.nm = nm # number of masks + self.npr = npr # number of protos + self.no = 5 + nc + self.nm # number of outputs per anchor + self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv + self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos + self.detect = TFDetect.call + + def call(self, x): + p = self.proto(x[0]) + # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos + p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160) + x = self.detect(self, x) + return (x, p) if self.training else (x[0], p) + + +class TFProto(keras.layers.Layer): + + def __init__(self, c1, c_=256, c2=32, w=None): + super().__init__() + self.cv1 = TFConv(c1, c_, k=3, w=w.cv1) + self.upsample = TFUpsample(None, scale_factor=2, mode='nearest') + self.cv2 = TFConv(c_, c_, k=3, w=w.cv2) + self.cv3 = TFConv(c_, c2, w=w.cv3) + + def call(self, inputs): + return self.cv3(self.cv2(self.upsample(self.cv1(inputs)))) + + +class TFUpsample(keras.layers.Layer): + # TF version of torch.nn.Upsample() + def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w' + super().__init__() + assert scale_factor % 2 == 0, "scale_factor must be multiple of 2" + self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode) + # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode) + # with default arguments: align_corners=False, half_pixel_centers=False + # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x, + # size=(x.shape[1] * 2, x.shape[2] * 2)) + + def call(self, inputs): + return self.upsample(inputs) + + +class TFConcat(keras.layers.Layer): + # TF version of torch.concat() + def __init__(self, dimension=1, w=None): + super().__init__() + assert dimension == 1, "convert only NCHW to NHWC concat" + self.d = 3 + + def call(self, inputs): + return tf.concat(inputs, self.d) + + +def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3) + LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") + anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] + na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors + no = na * (nc + 5) # number of outputs = anchors * (classes + 5) + + layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out + for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args + m_str = m + m = eval(m) if isinstance(m, str) else m # eval strings + for j, a in enumerate(args): + try: + args[j] = eval(a) if isinstance(a, str) else a # eval strings + except NameError: + pass + + n = max(round(n * gd), 1) if n > 1 else n # depth gain + if m in [ + nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv, + BottleneckCSP, C3, C3x]: + c1, c2 = ch[f], args[0] + c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 + + args = [c1, c2, *args[1:]] + if m in [BottleneckCSP, C3, C3x]: + args.insert(2, n) + n = 1 + elif m is nn.BatchNorm2d: + args = [ch[f]] + elif m is Concat: + c2 = sum(ch[-1 if x == -1 else x + 1] for x in f) + elif m in [Detect, Segment]: + args.append([ch[x + 1] for x in f]) + if isinstance(args[1], int): # number of anchors + args[1] = [list(range(args[1] * 2))] * len(f) + if m is Segment: + args[3] = make_divisible(args[3] * gw, 8) + args.append(imgsz) + else: + c2 = ch[f] + + tf_m = eval('TF' + m_str.replace('nn.', '')) + m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \ + else tf_m(*args, w=model.model[i]) # module + + torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module + t = str(m)[8:-2].replace('__main__.', '') # module type + np = sum(x.numel() for x in torch_m_.parameters()) # number params + m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params + LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print + save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist + layers.append(m_) + ch.append(c2) + return keras.Sequential(layers), sorted(save) + + +class TFModel: + # TF YOLO model + def __init__(self, cfg='yolo.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes + super().__init__() + if isinstance(cfg, dict): + self.yaml = cfg # model dict + else: # is *.yaml + import yaml # for torch hub + self.yaml_file = Path(cfg).name + with open(cfg) as f: + self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict + + # Define model + if nc and nc != self.yaml['nc']: + LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}") + self.yaml['nc'] = nc # override yaml value + self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz) + + def predict(self, + inputs, + tf_nms=False, + agnostic_nms=False, + topk_per_class=100, + topk_all=100, + iou_thres=0.45, + conf_thres=0.25): + y = [] # outputs + x = inputs + for m in self.model.layers: + if m.f != -1: # if not from previous layer + x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + + x = m(x) # run + y.append(x if m.i in self.savelist else None) # save output + + # Add TensorFlow NMS + if tf_nms: + boxes = self._xywh2xyxy(x[0][..., :4]) + probs = x[0][:, :, 4:5] + classes = x[0][:, :, 5:] + scores = probs * classes + if agnostic_nms: + nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres) + else: + boxes = tf.expand_dims(boxes, 2) + nms = tf.image.combined_non_max_suppression(boxes, + scores, + topk_per_class, + topk_all, + iou_thres, + conf_thres, + clip_boxes=False) + return (nms,) + return x # output [1,6300,85] = [xywh, conf, class0, class1, ...] + # x = x[0] # [x(1,6300,85), ...] to x(6300,85) + # xywh = x[..., :4] # x(6300,4) boxes + # conf = x[..., 4:5] # x(6300,1) confidences + # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes + # return tf.concat([conf, cls, xywh], 1) + + @staticmethod + def _xywh2xyxy(xywh): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1) + return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1) + + +class AgnosticNMS(keras.layers.Layer): + # TF Agnostic NMS + def call(self, input, topk_all, iou_thres, conf_thres): + # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450 + return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres), + input, + fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32), + name='agnostic_nms') + + @staticmethod + def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS + boxes, classes, scores = x + class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32) + scores_inp = tf.reduce_max(scores, -1) + selected_inds = tf.image.non_max_suppression(boxes, + scores_inp, + max_output_size=topk_all, + iou_threshold=iou_thres, + score_threshold=conf_thres) + selected_boxes = tf.gather(boxes, selected_inds) + padded_boxes = tf.pad(selected_boxes, + paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]], + mode="CONSTANT", + constant_values=0.0) + selected_scores = tf.gather(scores_inp, selected_inds) + padded_scores = tf.pad(selected_scores, + paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]], + mode="CONSTANT", + constant_values=-1.0) + selected_classes = tf.gather(class_inds, selected_inds) + padded_classes = tf.pad(selected_classes, + paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]], + mode="CONSTANT", + constant_values=-1.0) + valid_detections = tf.shape(selected_inds)[0] + return padded_boxes, padded_scores, padded_classes, valid_detections + + +def activations(act=nn.SiLU): + # Returns TF activation from input PyTorch activation + if isinstance(act, nn.LeakyReLU): + return lambda x: keras.activations.relu(x, alpha=0.1) + elif isinstance(act, nn.Hardswish): + return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667 + elif isinstance(act, (nn.SiLU, SiLU)): + return lambda x: keras.activations.swish(x) + else: + raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}') + + +def representative_dataset_gen(dataset, ncalib=100): + # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays + for n, (path, img, im0s, vid_cap, string) in enumerate(dataset): + im = np.transpose(img, [1, 2, 0]) + im = np.expand_dims(im, axis=0).astype(np.float32) + im /= 255 + yield [im] + if n >= ncalib: + break + + +def run( + weights=ROOT / 'yolo.pt', # weights path + imgsz=(640, 640), # inference size h,w + batch_size=1, # batch size + dynamic=False, # dynamic batch size +): + # PyTorch model + im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image + model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False) + _ = model(im) # inference + model.info() + + # TensorFlow model + im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image + tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz) + _ = tf_model.predict(im) # inference + + # Keras model + im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size) + keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im)) + keras_model.summary() + + LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.') + + +def parse_opt(): + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default=ROOT / 'yolo.pt', help='weights path') + parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') + parser.add_argument('--batch-size', type=int, default=1, help='batch size') + parser.add_argument('--dynamic', action='store_true', help='dynamic batch size') + opt = parser.parse_args() + opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand + print_args(vars(opt)) + return opt + + +def main(opt): + run(**vars(opt)) + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) diff --git a/models/yolo.py b/models/yolo.py new file mode 100644 index 0000000000000000000000000000000000000000..61d53eee57ba92be5a2efd5f55909e2dea8101d1 --- /dev/null +++ b/models/yolo.py @@ -0,0 +1,763 @@ +import argparse +import os +import platform +import sys +from copy import deepcopy +from pathlib import Path + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLO root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +if platform.system() != 'Windows': + ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from models.common import * +from models.experimental import * +from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args +from utils.plots import feature_visualization +from utils.torch_utils import (fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, + time_sync) +from utils.tal.anchor_generator import make_anchors, dist2bbox + +try: + import thop # for FLOPs computation +except ImportError: + thop = None + + +class Detect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = max((ch[0] // 4, self.reg_max * 4, 16)), max((ch[0], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) + self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() + + def forward(self, x): + shape = x[0].shape # BCHW + for i in range(self.nl): + x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) + if self.training: + return x + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = torch.cat((dbox, cls.sigmoid()), 1) + return y if self.export else (y, x) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class DDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4), max((ch[0], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4), nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)) for x in ch) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) + self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() + + def forward(self, x): + shape = x[0].shape # BCHW + for i in range(self.nl): + x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) + if self.training: + return x + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = torch.cat((dbox, cls.sigmoid()), 1) + return y if self.export else (y, x) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class DualDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) // 2 # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = max((ch[0] // 4, self.reg_max * 4, 16)), max((ch[0], min((self.nc * 2, 128)))) # channels + c4, c5 = max((ch[self.nl] // 4, self.reg_max * 4, 16)), max((ch[self.nl], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch[:self.nl]) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) + self.cv4 = nn.ModuleList( + nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, 4 * self.reg_max, 1)) for x in ch[self.nl:]) + self.cv5 = nn.ModuleList( + nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:]) + self.dfl = DFL(self.reg_max) + self.dfl2 = DFL(self.reg_max) + + def forward(self, x): + shape = x[0].shape # BCHW + d1 = [] + d2 = [] + for i in range(self.nl): + d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) + d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) + if self.training: + return [d1, d2] + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) + dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1)] + return y if self.export else (y, [d1, d2]) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv4, m.cv5, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class DualDDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) // 2 # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4), max((ch[0], min((self.nc * 2, 128)))) # channels + c4, c5 = make_divisible(max((ch[self.nl] // 4, self.reg_max * 4, 16)), 4), max((ch[self.nl], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4), nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)) for x in ch[:self.nl]) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) + self.cv4 = nn.ModuleList( + nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3, g=4), nn.Conv2d(c4, 4 * self.reg_max, 1, groups=4)) for x in ch[self.nl:]) + self.cv5 = nn.ModuleList( + nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:]) + self.dfl = DFL(self.reg_max) + self.dfl2 = DFL(self.reg_max) + + def forward(self, x): + shape = x[0].shape # BCHW + d1 = [] + d2 = [] + for i in range(self.nl): + d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) + d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) + if self.training: + return [d1, d2] + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) + dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1)] + return y if self.export else (y, [d1, d2]) + #y = torch.cat((dbox2, cls2.sigmoid()), 1) + #return y if self.export else (y, d2) + #y1 = torch.cat((dbox, cls.sigmoid()), 1) + #y2 = torch.cat((dbox2, cls2.sigmoid()), 1) + #return [y1, y2] if self.export else [(y1, d1), (y2, d2)] + #return [y1, y2] if self.export else [(y1, y2), (d1, d2)] + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv4, m.cv5, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class TripleDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) // 3 # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = max((ch[0] // 4, self.reg_max * 4, 16)), max((ch[0], min((self.nc * 2, 128)))) # channels + c4, c5 = max((ch[self.nl] // 4, self.reg_max * 4, 16)), max((ch[self.nl], min((self.nc * 2, 128)))) # channels + c6, c7 = max((ch[self.nl * 2] // 4, self.reg_max * 4, 16)), max((ch[self.nl * 2], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch[:self.nl]) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) + self.cv4 = nn.ModuleList( + nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, 4 * self.reg_max, 1)) for x in ch[self.nl:self.nl*2]) + self.cv5 = nn.ModuleList( + nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:self.nl*2]) + self.cv6 = nn.ModuleList( + nn.Sequential(Conv(x, c6, 3), Conv(c6, c6, 3), nn.Conv2d(c6, 4 * self.reg_max, 1)) for x in ch[self.nl*2:self.nl*3]) + self.cv7 = nn.ModuleList( + nn.Sequential(Conv(x, c7, 3), Conv(c7, c7, 3), nn.Conv2d(c7, self.nc, 1)) for x in ch[self.nl*2:self.nl*3]) + self.dfl = DFL(self.reg_max) + self.dfl2 = DFL(self.reg_max) + self.dfl3 = DFL(self.reg_max) + + def forward(self, x): + shape = x[0].shape # BCHW + d1 = [] + d2 = [] + d3 = [] + for i in range(self.nl): + d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) + d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) + d3.append(torch.cat((self.cv6[i](x[self.nl*2+i]), self.cv7[i](x[self.nl*2+i])), 1)) + if self.training: + return [d1, d2, d3] + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) + dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box3, cls3 = torch.cat([di.view(shape[0], self.no, -1) for di in d3], 2).split((self.reg_max * 4, self.nc), 1) + dbox3 = dist2bbox(self.dfl3(box3), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1), torch.cat((dbox3, cls3.sigmoid()), 1)] + return y if self.export else (y, [d1, d2, d3]) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv4, m.cv5, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv6, m.cv7, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class TripleDDetect(nn.Module): + # YOLO Detect head for detection models + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=(), inplace=True): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) // 3 # number of detection layers + self.reg_max = 16 + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.stride = torch.zeros(self.nl) # strides computed during build + + c2, c3 = make_divisible(max((ch[0] // 4, self.reg_max * 4, 16)), 4), \ + max((ch[0], min((self.nc * 2, 128)))) # channels + c4, c5 = make_divisible(max((ch[self.nl] // 4, self.reg_max * 4, 16)), 4), \ + max((ch[self.nl], min((self.nc * 2, 128)))) # channels + c6, c7 = make_divisible(max((ch[self.nl * 2] // 4, self.reg_max * 4, 16)), 4), \ + max((ch[self.nl * 2], min((self.nc * 2, 128)))) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3, g=4), + nn.Conv2d(c2, 4 * self.reg_max, 1, groups=4)) for x in ch[:self.nl]) + self.cv3 = nn.ModuleList( + nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch[:self.nl]) + self.cv4 = nn.ModuleList( + nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3, g=4), + nn.Conv2d(c4, 4 * self.reg_max, 1, groups=4)) for x in ch[self.nl:self.nl*2]) + self.cv5 = nn.ModuleList( + nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nc, 1)) for x in ch[self.nl:self.nl*2]) + self.cv6 = nn.ModuleList( + nn.Sequential(Conv(x, c6, 3), Conv(c6, c6, 3, g=4), + nn.Conv2d(c6, 4 * self.reg_max, 1, groups=4)) for x in ch[self.nl*2:self.nl*3]) + self.cv7 = nn.ModuleList( + nn.Sequential(Conv(x, c7, 3), Conv(c7, c7, 3), nn.Conv2d(c7, self.nc, 1)) for x in ch[self.nl*2:self.nl*3]) + self.dfl = DFL(self.reg_max) + self.dfl2 = DFL(self.reg_max) + self.dfl3 = DFL(self.reg_max) + + def forward(self, x): + shape = x[0].shape # BCHW + d1 = [] + d2 = [] + d3 = [] + for i in range(self.nl): + d1.append(torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)) + d2.append(torch.cat((self.cv4[i](x[self.nl+i]), self.cv5[i](x[self.nl+i])), 1)) + d3.append(torch.cat((self.cv6[i](x[self.nl*2+i]), self.cv7[i](x[self.nl*2+i])), 1)) + if self.training: + return [d1, d2, d3] + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (d1.transpose(0, 1) for d1 in make_anchors(d1, self.stride, 0.5)) + self.shape = shape + + box, cls = torch.cat([di.view(shape[0], self.no, -1) for di in d1], 2).split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box2, cls2 = torch.cat([di.view(shape[0], self.no, -1) for di in d2], 2).split((self.reg_max * 4, self.nc), 1) + dbox2 = dist2bbox(self.dfl2(box2), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + box3, cls3 = torch.cat([di.view(shape[0], self.no, -1) for di in d3], 2).split((self.reg_max * 4, self.nc), 1) + dbox3 = dist2bbox(self.dfl3(box3), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + #y = [torch.cat((dbox, cls.sigmoid()), 1), torch.cat((dbox2, cls2.sigmoid()), 1), torch.cat((dbox3, cls3.sigmoid()), 1)] + #return y if self.export else (y, [d1, d2, d3]) + y = torch.cat((dbox3, cls3.sigmoid()), 1) + return y if self.export else (y, d3) + + def bias_init(self): + # Initialize Detect() biases, WARNING: requires stride availability + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv4, m.cv5, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + for a, b, s in zip(m.cv6, m.cv7, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (5 objects and 80 classes per 640 image) + + +class Segment(Detect): + # YOLO Segment head for segmentation models + def __init__(self, nc=80, nm=32, npr=256, ch=(), inplace=True): + super().__init__(nc, ch, inplace) + self.nm = nm # number of masks + self.npr = npr # number of protos + self.proto = Proto(ch[0], self.npr, self.nm) # protos + self.detect = Detect.forward + + c4 = max(ch[0] // 4, self.nm) + self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) + + def forward(self, x): + p = self.proto(x[0]) + bs = p.shape[0] + + mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients + x = self.detect(self, x) + if self.training: + return x, mc, p + return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) + + +class Panoptic(Detect): + # YOLO Panoptic head for panoptic segmentation models + def __init__(self, nc=80, sem_nc=93, nm=32, npr=256, ch=(), inplace=True): + super().__init__(nc, ch, inplace) + self.sem_nc = sem_nc + self.nm = nm # number of masks + self.npr = npr # number of protos + self.proto = Proto(ch[0], self.npr, self.nm) # protos + self.uconv = UConv(ch[0], ch[0]//4, self.sem_nc+self.nc) + self.detect = Detect.forward + + c4 = max(ch[0] // 4, self.nm) + self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) + + + def forward(self, x): + p = self.proto(x[0]) + s = self.uconv(x[0]) + bs = p.shape[0] + + mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients + x = self.detect(self, x) + if self.training: + return x, mc, p, s + return (torch.cat([x, mc], 1), p, s) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p, s)) + + +class BaseModel(nn.Module): + # YOLO base model + def forward(self, x, profile=False, visualize=False): + return self._forward_once(x, profile, visualize) # single-scale inference, train + + def _forward_once(self, x, profile=False, visualize=False): + y, dt = [], [] # outputs + for m in self.model: + if m.f != -1: # if not from previous layer + x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + if profile: + self._profile_one_layer(m, x, dt) + x = m(x) # run + y.append(x if m.i in self.save else None) # save output + if visualize: + feature_visualization(x, m.type, m.i, save_dir=visualize) + return x + + def _profile_one_layer(self, m, x, dt): + c = m == self.model[-1] # is final layer, copy input as inplace fix + o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs + t = time_sync() + for _ in range(10): + m(x.copy() if c else x) + dt.append((time_sync() - t) * 100) + if m == self.model[0]: + LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module") + LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}') + if c: + LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total") + + def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers + LOGGER.info('Fusing layers... ') + for m in self.model.modules(): + if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'): + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, 'bn') # remove batchnorm + m.forward = m.forward_fuse # update forward + self.info() + return self + + def info(self, verbose=False, img_size=640): # print model information + model_info(self, verbose, img_size) + + def _apply(self, fn): + # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers + self = super()._apply(fn) + m = self.model[-1] # Detect() + if isinstance(m, (Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment)): + m.stride = fn(m.stride) + m.anchors = fn(m.anchors) + m.strides = fn(m.strides) + # m.grid = list(map(fn, m.grid)) + return self + + +class DetectionModel(BaseModel): + # YOLO detection model + def __init__(self, cfg='yolo.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes + super().__init__() + if isinstance(cfg, dict): + self.yaml = cfg # model dict + else: # is *.yaml + import yaml # for torch hub + self.yaml_file = Path(cfg).name + with open(cfg, encoding='ascii', errors='ignore') as f: + self.yaml = yaml.safe_load(f) # model dict + + # Define model + ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels + if nc and nc != self.yaml['nc']: + LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") + self.yaml['nc'] = nc # override yaml value + if anchors: + LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}') + self.yaml['anchors'] = round(anchors) # override yaml value + self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist + self.names = [str(i) for i in range(self.yaml['nc'])] # default names + self.inplace = self.yaml.get('inplace', True) + + # Build strides, anchors + m = self.model[-1] # Detect() + if isinstance(m, (Detect, DDetect, Segment)): + s = 256 # 2x min stride + m.inplace = self.inplace + forward = lambda x: self.forward(x)[0] if isinstance(m, (Segment)) else self.forward(x) + m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward + # check_anchor_order(m) + # m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + m.bias_init() # only run once + if isinstance(m, (DualDetect, TripleDetect, DualDDetect, TripleDDetect)): + s = 256 # 2x min stride + m.inplace = self.inplace + #forward = lambda x: self.forward(x)[0][0] if isinstance(m, (DualSegment)) else self.forward(x)[0] + forward = lambda x: self.forward(x)[0] + m.stride = torch.tensor([s / x.shape[-2] for x in forward(torch.zeros(1, ch, s, s))]) # forward + # check_anchor_order(m) + # m.anchors /= m.stride.view(-1, 1, 1) + self.stride = m.stride + m.bias_init() # only run once + + # Init weights, biases + initialize_weights(self) + self.info() + LOGGER.info('') + + def forward(self, x, augment=False, profile=False, visualize=False): + if augment: + return self._forward_augment(x) # augmented inference, None + return self._forward_once(x, profile, visualize) # single-scale inference, train + + def _forward_augment(self, x): + img_size = x.shape[-2:] # height, width + s = [1, 0.83, 0.67] # scales + f = [None, 3, None] # flips (2-ud, 3-lr) + y = [] # outputs + for si, fi in zip(s, f): + xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) + yi = self._forward_once(xi)[0] # forward + # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save + yi = self._descale_pred(yi, fi, si, img_size) + y.append(yi) + y = self._clip_augmented(y) # clip augmented tails + return torch.cat(y, 1), None # augmented inference, train + + def _descale_pred(self, p, flips, scale, img_size): + # de-scale predictions following augmented inference (inverse operation) + if self.inplace: + p[..., :4] /= scale # de-scale + if flips == 2: + p[..., 1] = img_size[0] - p[..., 1] # de-flip ud + elif flips == 3: + p[..., 0] = img_size[1] - p[..., 0] # de-flip lr + else: + x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale + if flips == 2: + y = img_size[0] - y # de-flip ud + elif flips == 3: + x = img_size[1] - x # de-flip lr + p = torch.cat((x, y, wh, p[..., 4:]), -1) + return p + + def _clip_augmented(self, y): + # Clip YOLO augmented inference tails + nl = self.model[-1].nl # number of detection layers (P3-P5) + g = sum(4 ** x for x in range(nl)) # grid points + e = 1 # exclude layer count + i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices + y[0] = y[0][:, :-i] # large + i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices + y[-1] = y[-1][:, i:] # small + return y + + +Model = DetectionModel # retain YOLO 'Model' class for backwards compatibility + + +class SegmentationModel(DetectionModel): + # YOLO segmentation model + def __init__(self, cfg='yolo-seg.yaml', ch=3, nc=None, anchors=None): + super().__init__(cfg, ch, nc, anchors) + + +class ClassificationModel(BaseModel): + # YOLO classification model + def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml, model, number of classes, cutoff index + super().__init__() + self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg) + + def _from_detection_model(self, model, nc=1000, cutoff=10): + # Create a YOLO classification model from a YOLO detection model + if isinstance(model, DetectMultiBackend): + model = model.model # unwrap DetectMultiBackend + model.model = model.model[:cutoff] # backbone + m = model.model[-1] # last layer + ch = m.conv.in_channels if hasattr(m, 'conv') else m.cv1.conv.in_channels # ch into module + c = Classify(ch, nc) # Classify() + c.i, c.f, c.type = m.i, m.f, 'models.common.Classify' # index, from, type + model.model[-1] = c # replace + self.model = model.model + self.stride = model.stride + self.save = [] + self.nc = nc + + def _from_yaml(self, cfg): + # Create a YOLO classification model from a *.yaml file + self.model = None + + +def parse_model(d, ch): # model_dict, input_channels(3) + # Parse a YOLO model.yaml dictionary + LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") + anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation') + if act: + Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() + LOGGER.info(f"{colorstr('activation:')} {act}") # print + na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors + no = na * (nc + 5) # number of outputs = anchors * (classes + 5) + + layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out + for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args + m = eval(m) if isinstance(m, str) else m # eval strings + for j, a in enumerate(args): + with contextlib.suppress(NameError): + args[j] = eval(a) if isinstance(a, str) else a # eval strings + + n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain + if m in { + Conv, AConv, ConvTranspose, + Bottleneck, SPP, SPPF, DWConv, BottleneckCSP, nn.ConvTranspose2d, DWConvTranspose2d, SPPCSPC, ADown, + RepNCSPELAN4, SPPELAN}: + c1, c2 = ch[f], args[0] + if c2 != no: # if not output + c2 = make_divisible(c2 * gw, 8) + + args = [c1, c2, *args[1:]] + if m in {BottleneckCSP, SPPCSPC}: + args.insert(2, n) # number of repeats + n = 1 + elif m is nn.BatchNorm2d: + args = [ch[f]] + elif m is Concat: + c2 = sum(ch[x] for x in f) + elif m is Shortcut: + c2 = ch[f[0]] + elif m is ReOrg: + c2 = ch[f] * 4 + elif m is CBLinear: + c2 = args[0] + c1 = ch[f] + args = [c1, c2, *args[1:]] + elif m is CBFuse: + c2 = ch[f[-1]] + # TODO: channel, gw, gd + elif m in {Detect, DualDetect, TripleDetect, DDetect, DualDDetect, TripleDDetect, Segment}: + args.append([ch[x] for x in f]) + # if isinstance(args[1], int): # number of anchors + # args[1] = [list(range(args[1] * 2))] * len(f) + if m in {Segment}: + args[2] = make_divisible(args[2] * gw, 8) + elif m is Contract: + c2 = ch[f] * args[0] ** 2 + elif m is Expand: + c2 = ch[f] // args[0] ** 2 + else: + c2 = ch[f] + + m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module + t = str(m)[8:-2].replace('__main__.', '') # module type + np = sum(x.numel() for x in m_.parameters()) # number params + m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params + LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print + save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist + layers.append(m_) + if i == 0: + ch = [] + ch.append(c2) + return nn.Sequential(*layers), sorted(save) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--cfg', type=str, default='yolo.yaml', help='model.yaml') + parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--profile', action='store_true', help='profile model speed') + parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer') + parser.add_argument('--test', action='store_true', help='test all yolo*.yaml') + opt = parser.parse_args() + opt.cfg = check_yaml(opt.cfg) # check YAML + print_args(vars(opt)) + device = select_device(opt.device) + + # Create model + im = torch.rand(opt.batch_size, 3, 640, 640).to(device) + model = Model(opt.cfg).to(device) + model.eval() + + # Options + if opt.line_profile: # profile layer by layer + model(im, profile=True) + + elif opt.profile: # profile forward-backward + results = profile(input=im, ops=[model], n=3) + + elif opt.test: # test all models + for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'): + try: + _ = Model(cfg) + except Exception as e: + print(f'Error in {cfg}: {e}') + + else: # report fused model summary + model.fuse() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f96afcb0d49d7379d165bed5a38c5ad2a950271 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,47 @@ +# requirements +# Usage: pip install -r requirements.txt + +# Base ------------------------------------------------------------------------ +gitpython +ipython +matplotlib>=3.2.2 +numpy>=1.18.5 +opencv-python>=4.1.1 +Pillow>=7.1.2 +psutil +PyYAML>=5.3.1 +requests>=2.23.0 +scipy>=1.4.1 +thop>=0.1.1 +torch>=1.7.0 +torchvision>=0.8.1 +tqdm>=4.64.0 +# protobuf<=3.20.1 + +# Logging --------------------------------------------------------------------- +tensorboard>=2.4.1 +# clearml>=1.2.0 +# comet + +# Plotting -------------------------------------------------------------------- +pandas>=1.1.4 +seaborn>=0.11.0 + +# Export ---------------------------------------------------------------------- +# coremltools>=6.0 +# onnx>=1.9.0 +# onnx-simplifier>=0.4.1 +# nvidia-pyindex +# nvidia-tensorrt +# scikit-learn<=1.1.2 +# tensorflow>=2.4.1 +# tensorflowjs>=3.9.0 +# openvino-dev + +# Deploy ---------------------------------------------------------------------- +# tritonclient[all]~=2.24.0 + +# Extras ---------------------------------------------------------------------- +# mss +albumentations>=1.0.3 +pycocotools>=2.0 diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d1de89ab906487f1ef16c8de246f7da5e2f408c --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,75 @@ +import contextlib +import platform +import threading + + +def emojis(str=''): + # Return platform-dependent emoji-safe version of string + return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str + + +class TryExcept(contextlib.ContextDecorator): + # YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager + def __init__(self, msg=''): + self.msg = msg + + def __enter__(self): + pass + + def __exit__(self, exc_type, value, traceback): + if value: + print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}")) + return True + + +def threaded(func): + # Multi-threads a target function and returns thread. Usage: @threaded decorator + def wrapper(*args, **kwargs): + thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True) + thread.start() + return thread + + return wrapper + + +def join_threads(verbose=False): + # Join all daemon threads, i.e. atexit.register(lambda: join_threads()) + main_thread = threading.current_thread() + for t in threading.enumerate(): + if t is not main_thread: + if verbose: + print(f'Joining thread {t.name}') + t.join() + + +def notebook_init(verbose=True): + # Check system software and hardware + print('Checking setup...') + + import os + import shutil + + from utils.general import check_font, check_requirements, is_colab + from utils.torch_utils import select_device # imports + + check_font() + + import psutil + from IPython import display # to display images and clear console output + + if is_colab(): + shutil.rmtree('/content/sample_data', ignore_errors=True) # remove colab /sample_data directory + + # System info + if verbose: + gb = 1 << 30 # bytes to GiB (1024 ** 3) + ram = psutil.virtual_memory().total + total, used, free = shutil.disk_usage("/") + display.clear_output() + s = f'({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)' + else: + s = '' + + select_device(newline=False) + print(emojis(f'Setup complete ✅ {s}')) + return display diff --git a/utils/activations.py b/utils/activations.py new file mode 100644 index 0000000000000000000000000000000000000000..aeb00e6c7fc936ad596706c19a89ae7d2605f1c9 --- /dev/null +++ b/utils/activations.py @@ -0,0 +1,98 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class SiLU(nn.Module): + # SiLU activation https://arxiv.org/pdf/1606.08415.pdf + @staticmethod + def forward(x): + return x * torch.sigmoid(x) + + +class Hardswish(nn.Module): + # Hard-SiLU activation + @staticmethod + def forward(x): + # return x * F.hardsigmoid(x) # for TorchScript and CoreML + return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX + + +class Mish(nn.Module): + # Mish activation https://github.com/digantamisra98/Mish + @staticmethod + def forward(x): + return x * F.softplus(x).tanh() + + +class MemoryEfficientMish(nn.Module): + # Mish activation memory-efficient + class F(torch.autograd.Function): + + @staticmethod + def forward(ctx, x): + ctx.save_for_backward(x) + return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) + + @staticmethod + def backward(ctx, grad_output): + x = ctx.saved_tensors[0] + sx = torch.sigmoid(x) + fx = F.softplus(x).tanh() + return grad_output * (fx + x * sx * (1 - fx * fx)) + + def forward(self, x): + return self.F.apply(x) + + +class FReLU(nn.Module): + # FReLU activation https://arxiv.org/abs/2007.11824 + def __init__(self, c1, k=3): # ch_in, kernel + super().__init__() + self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) + self.bn = nn.BatchNorm2d(c1) + + def forward(self, x): + return torch.max(x, self.bn(self.conv(x))) + + +class AconC(nn.Module): + r""" ACON activation (activate or not) + AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter + according to "Activate or Not: Learning Customized Activation" . + """ + + def __init__(self, c1): + super().__init__() + self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) + self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) + self.beta = nn.Parameter(torch.ones(1, c1, 1, 1)) + + def forward(self, x): + dpx = (self.p1 - self.p2) * x + return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x + + +class MetaAconC(nn.Module): + r""" ACON activation (activate or not) + MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network + according to "Activate or Not: Learning Customized Activation" . + """ + + def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r + super().__init__() + c2 = max(r, c1 // r) + self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1)) + self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1)) + self.fc1 = nn.Conv2d(c1, c2, k, s, bias=True) + self.fc2 = nn.Conv2d(c2, c1, k, s, bias=True) + # self.bn1 = nn.BatchNorm2d(c2) + # self.bn2 = nn.BatchNorm2d(c1) + + def forward(self, x): + y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True) + # batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891 + # beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable + beta = torch.sigmoid(self.fc2(self.fc1(y))) # bug patch BN layers removed + dpx = (self.p1 - self.p2) * x + return dpx * torch.sigmoid(beta * dpx) + self.p2 * x diff --git a/utils/augmentations.py b/utils/augmentations.py new file mode 100644 index 0000000000000000000000000000000000000000..ad4c07fb69ea43a113b4fcd0bed58eb8dda13f71 --- /dev/null +++ b/utils/augmentations.py @@ -0,0 +1,395 @@ +import math +import random + +import cv2 +import numpy as np +import torch +import torchvision.transforms as T +import torchvision.transforms.functional as TF + +from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box, xywhn2xyxy +from utils.metrics import bbox_ioa + +IMAGENET_MEAN = 0.485, 0.456, 0.406 # RGB mean +IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation + + +class Albumentations: + # YOLOv5 Albumentations class (optional, only used if package is installed) + def __init__(self, size=640): + self.transform = None + prefix = colorstr('albumentations: ') + try: + import albumentations as A + check_version(A.__version__, '1.0.3', hard=True) # version requirement + + T = [ + A.RandomResizedCrop(height=size, width=size, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0), + A.Blur(p=0.01), + A.MedianBlur(p=0.01), + A.ToGray(p=0.01), + A.CLAHE(p=0.01), + A.RandomBrightnessContrast(p=0.0), + A.RandomGamma(p=0.0), + A.ImageCompression(quality_lower=75, p=0.0)] # transforms + self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) + + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + except ImportError: # package not installed, skip + pass + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + def __call__(self, im, labels, p=1.0): + if self.transform and random.random() < p: + new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed + im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])]) + return im, labels + + +def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False): + # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std + return TF.normalize(x, mean, std, inplace=inplace) + + +def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD): + # Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean + for i in range(3): + x[:, i] = x[:, i] * std[i] + mean[i] + return x + + +def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5): + # HSV color-space augmentation + if hgain or sgain or vgain: + r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV)) + dtype = im.dtype # uint8 + + x = np.arange(0, 256, dtype=r.dtype) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) + cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed + + +def hist_equalize(im, clahe=True, bgr=False): + # Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255 + yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV) + if clahe: + c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + yuv[:, :, 0] = c.apply(yuv[:, :, 0]) + else: + yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram + return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB + + +def replicate(im, labels): + # Replicate labels + h, w = im.shape[:2] + boxes = labels[:, 1:].astype(int) + x1, y1, x2, y2 = boxes.T + s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels) + for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices + x1b, y1b, x2b, y2b = boxes[i] + bh, bw = y2b - y1b, x2b - x1b + yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y + x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] + im[y1a:y2a, x1a:x2a] = im[y1b:y2b, x1b:x2b] # im4[ymin:ymax, xmin:xmax] + labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) + + return im, labels + + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + + +def random_perspective(im, + targets=(), + segments=(), + degrees=10, + translate=.1, + scale=.1, + shear=10, + perspective=0.0, + border=(0, 0)): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) + # targets = [cls, xyxy] + + height = im.shape[0] + border[0] * 2 # shape(h,w,c) + width = im.shape[1] + border[1] * 2 + + # Center + C = np.eye(3) + C[0, 2] = -im.shape[1] / 2 # x translation (pixels) + C[1, 2] = -im.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3) + P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3) + a = random.uniform(-degrees, degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - scale, 1 + scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3) + T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) + T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if perspective: + im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) + else: # affine + im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) + + # Visualize + # import matplotlib.pyplot as plt + # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() + # ax[0].imshow(im[:, :, ::-1]) # base + # ax[1].imshow(im2[:, :, ::-1]) # warped + + # Transform label coordinates + n = len(targets) + if n: + use_segments = any(x.any() for x in segments) + new = np.zeros((n, 4)) + if use_segments: # warp segments + segments = resample_segments(segments) # upsample + for i, segment in enumerate(segments): + xy = np.ones((len(segment), 3)) + xy[:, :2] = segment + xy = xy @ M.T # transform + xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine + + # clip + new[i] = segment2box(xy, width, height) + + else: # warp boxes + xy = np.ones((n * 4, 3)) + xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + + # clip + new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) + new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) + + # filter candidates + i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) + targets = targets[i] + targets[:, 1:5] = new[i] + + return im, targets + + +def copy_paste(im, labels, segments, p=0.5): + # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) + n = len(segments) + if p and n: + h, w, c = im.shape # height, width, channels + im_new = np.zeros(im.shape, np.uint8) + + # calculate ioa first then select indexes randomly + boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4) + ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area + indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) + n = len(indexes) + for j in random.sample(list(indexes), k=round(p * n)): + l, box, s = labels[j], boxes[j], segments[j] + labels = np.concatenate((labels, [[l[0], *box]]), 0) + segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) + cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED) + + result = cv2.flip(im, 1) # augment segments (flip left-right) + i = cv2.flip(im_new, 1).astype(bool) + im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug + + return im, labels, segments + + +def cutout(im, labels, p=0.5): + # Applies image cutout augmentation https://arxiv.org/abs/1708.04552 + if random.random() < p: + h, w = im.shape[:2] + scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction + for s in scales: + mask_h = random.randint(1, int(h * s)) # create random masks + mask_w = random.randint(1, int(w * s)) + + # box + xmin = max(0, random.randint(0, w) - mask_w // 2) + ymin = max(0, random.randint(0, h) - mask_h // 2) + xmax = min(w, xmin + mask_w) + ymax = min(h, ymin + mask_h) + + # apply random color mask + im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] + + # return unobscured labels + if len(labels) and s > 0.03: + box = np.array([[xmin, ymin, xmax, ymax]], dtype=np.float32) + ioa = bbox_ioa(box, xywhn2xyxy(labels[:, 1:5], w, h))[0] # intersection over area + labels = labels[ioa < 0.60] # remove >60% obscured labels + + return labels + + +def mixup(im, labels, im2, labels2): + # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf + r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 + im = (im * r + im2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + return im, labels + + +def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) + # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio + return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates + + +def classify_albumentations( + augment=True, + size=224, + scale=(0.08, 1.0), + ratio=(0.75, 1.0 / 0.75), # 0.75, 1.33 + hflip=0.5, + vflip=0.0, + jitter=0.4, + mean=IMAGENET_MEAN, + std=IMAGENET_STD, + auto_aug=False): + # YOLOv5 classification Albumentations (optional, only used if package is installed) + prefix = colorstr('albumentations: ') + try: + import albumentations as A + from albumentations.pytorch import ToTensorV2 + check_version(A.__version__, '1.0.3', hard=True) # version requirement + if augment: # Resize and crop + T = [A.RandomResizedCrop(height=size, width=size, scale=scale, ratio=ratio)] + if auto_aug: + # TODO: implement AugMix, AutoAug & RandAug in albumentation + LOGGER.info(f'{prefix}auto augmentations are currently not supported') + else: + if hflip > 0: + T += [A.HorizontalFlip(p=hflip)] + if vflip > 0: + T += [A.VerticalFlip(p=vflip)] + if jitter > 0: + color_jitter = (float(jitter),) * 3 # repeat value for brightness, contrast, satuaration, 0 hue + T += [A.ColorJitter(*color_jitter, 0)] + else: # Use fixed crop for eval set (reproducibility) + T = [A.SmallestMaxSize(max_size=size), A.CenterCrop(height=size, width=size)] + T += [A.Normalize(mean=mean, std=std), ToTensorV2()] # Normalize and convert to Tensor + LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p)) + return A.Compose(T) + + except ImportError: # package not installed, skip + LOGGER.warning(f'{prefix}⚠️ not found, install with `pip install albumentations` (recommended)') + except Exception as e: + LOGGER.info(f'{prefix}{e}') + + +def classify_transforms(size=224): + # Transforms to apply if albumentations not installed + assert isinstance(size, int), f'ERROR: classify_transforms size {size} must be integer, not (list, tuple)' + # T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)]) + + +class LetterBox: + # YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + def __init__(self, size=(640, 640), auto=False, stride=32): + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + self.auto = auto # pass max size integer, automatically solve for short side using stride + self.stride = stride # used with auto + + def __call__(self, im): # im = np.array HWC + imh, imw = im.shape[:2] + r = min(self.h / imh, self.w / imw) # ratio of new/old + h, w = round(imh * r), round(imw * r) # resized image + hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w + top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1) + im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype) + im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) + return im_out + + +class CenterCrop: + # YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()]) + def __init__(self, size=640): + super().__init__() + self.h, self.w = (size, size) if isinstance(size, int) else size + + def __call__(self, im): # im = np.array HWC + imh, imw = im.shape[:2] + m = min(imh, imw) # min dimension + top, left = (imh - m) // 2, (imw - m) // 2 + return cv2.resize(im[top:top + m, left:left + m], (self.w, self.h), interpolation=cv2.INTER_LINEAR) + + +class ToTensor: + # YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()]) + def __init__(self, half=False): + super().__init__() + self.half = half + + def __call__(self, im): # im = np.array HWC in BGR order + im = np.ascontiguousarray(im.transpose((2, 0, 1))[::-1]) # HWC to CHW -> BGR to RGB -> contiguous + im = torch.from_numpy(im) # to torch + im = im.half() if self.half else im.float() # uint8 to fp16/32 + im /= 255.0 # 0-255 to 0.0-1.0 + return im diff --git a/utils/autoanchor.py b/utils/autoanchor.py new file mode 100644 index 0000000000000000000000000000000000000000..bd81af92c93353786ebcaced0cc2bbb419378071 --- /dev/null +++ b/utils/autoanchor.py @@ -0,0 +1,164 @@ +import random + +import numpy as np +import torch +import yaml +from tqdm import tqdm + +from utils import TryExcept +from utils.general import LOGGER, TQDM_BAR_FORMAT, colorstr + +PREFIX = colorstr('AutoAnchor: ') + + +def check_anchor_order(m): + # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary + a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer + da = a[-1] - a[0] # delta a + ds = m.stride[-1] - m.stride[0] # delta s + if da and (da.sign() != ds.sign()): # same order + LOGGER.info(f'{PREFIX}Reversing anchor order') + m.anchors[:] = m.anchors.flip(0) + + +@TryExcept(f'{PREFIX}ERROR') +def check_anchors(dataset, model, thr=4.0, imgsz=640): + # Check anchor fit to data, recompute if necessary + m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() + shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) + scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale + wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh + + def metric(k): # compute metric + r = wh[:, None] / k[None] + x = torch.min(r, 1 / r).min(2)[0] # ratio metric + best = x.max(1)[0] # best_x + aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold + bpr = (best > 1 / thr).float().mean() # best possible recall + return bpr, aat + + stride = m.stride.to(m.anchors.device).view(-1, 1, 1) # model strides + anchors = m.anchors.clone() * stride # current anchors + bpr, aat = metric(anchors.cpu().view(-1, 2)) + s = f'\n{PREFIX}{aat:.2f} anchors/target, {bpr:.3f} Best Possible Recall (BPR). ' + if bpr > 0.98: # threshold to recompute + LOGGER.info(f'{s}Current anchors are a good fit to dataset ✅') + else: + LOGGER.info(f'{s}Anchors are a poor fit to dataset ⚠️, attempting to improve...') + na = m.anchors.numel() // 2 # number of anchors + anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) + new_bpr = metric(anchors)[0] + if new_bpr > bpr: # replace anchors + anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) + m.anchors[:] = anchors.clone().view_as(m.anchors) + check_anchor_order(m) # must be in pixel-space (not grid-space) + m.anchors /= stride + s = f'{PREFIX}Done ✅ (optional: update model *.yaml to use these anchors in the future)' + else: + s = f'{PREFIX}Done ⚠️ (original anchors better than new anchors, proceeding with original anchors)' + LOGGER.info(s) + + +def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): + """ Creates kmeans-evolved anchors from training dataset + + Arguments: + dataset: path to data.yaml, or a loaded dataset + n: number of anchors + img_size: image size used for training + thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 + gen: generations to evolve anchors using genetic algorithm + verbose: print all results + + Return: + k: kmeans evolved anchors + + Usage: + from utils.autoanchor import *; _ = kmean_anchors() + """ + from scipy.cluster.vq import kmeans + + npr = np.random + thr = 1 / thr + + def metric(k, wh): # compute metrics + r = wh[:, None] / k[None] + x = torch.min(r, 1 / r).min(2)[0] # ratio metric + # x = wh_iou(wh, torch.tensor(k)) # iou metric + return x, x.max(1)[0] # x, best_x + + def anchor_fitness(k): # mutation fitness + _, best = metric(torch.tensor(k, dtype=torch.float32), wh) + return (best * (best > thr).float()).mean() # fitness + + def print_results(k, verbose=True): + k = k[np.argsort(k.prod(1))] # sort small to large + x, best = metric(k, wh0) + bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr + s = f'{PREFIX}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr\n' \ + f'{PREFIX}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' \ + f'past_thr={x[x > thr].mean():.3f}-mean: ' + for x in k: + s += '%i,%i, ' % (round(x[0]), round(x[1])) + if verbose: + LOGGER.info(s[:-2]) + return k + + if isinstance(dataset, str): # *.yaml file + with open(dataset, errors='ignore') as f: + data_dict = yaml.safe_load(f) # model dict + from utils.dataloaders import LoadImagesAndLabels + dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) + + # Get label wh + shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) + wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh + + # Filter + i = (wh0 < 3.0).any(1).sum() + if i: + LOGGER.info(f'{PREFIX}WARNING ⚠️ Extremely small objects found: {i} of {len(wh0)} labels are <3 pixels in size') + wh = wh0[(wh0 >= 2.0).any(1)].astype(np.float32) # filter > 2 pixels + # wh = wh * (npr.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 + + # Kmeans init + try: + LOGGER.info(f'{PREFIX}Running kmeans for {n} anchors on {len(wh)} points...') + assert n <= len(wh) # apply overdetermined constraint + s = wh.std(0) # sigmas for whitening + k = kmeans(wh / s, n, iter=30)[0] * s # points + assert n == len(k) # kmeans may return fewer points than requested if wh is insufficient or too similar + except Exception: + LOGGER.warning(f'{PREFIX}WARNING ⚠️ switching strategies from kmeans to random init') + k = np.sort(npr.rand(n * 2)).reshape(n, 2) * img_size # random init + wh, wh0 = (torch.tensor(x, dtype=torch.float32) for x in (wh, wh0)) + k = print_results(k, verbose=False) + + # Plot + # k, d = [None] * 20, [None] * 20 + # for i in tqdm(range(1, 21)): + # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance + # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) + # ax = ax.ravel() + # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') + # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh + # ax[0].hist(wh[wh[:, 0]<100, 0],400) + # ax[1].hist(wh[wh[:, 1]<100, 1],400) + # fig.savefig('wh.png', dpi=200) + + # Evolve + f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma + pbar = tqdm(range(gen), bar_format=TQDM_BAR_FORMAT) # progress bar + for _ in pbar: + v = np.ones(sh) + while (v == 1).all(): # mutate until a change occurs (prevent duplicates) + v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) + kg = (k.copy() * v).clip(min=2.0) + fg = anchor_fitness(kg) + if fg > f: + f, k = fg, kg.copy() + pbar.desc = f'{PREFIX}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' + if verbose: + print_results(k, verbose) + + return print_results(k).astype(np.float32) diff --git a/utils/autobatch.py b/utils/autobatch.py new file mode 100644 index 0000000000000000000000000000000000000000..a5f0d519e59f7b13ef67e8934b61a2cd30770701 --- /dev/null +++ b/utils/autobatch.py @@ -0,0 +1,67 @@ +from copy import deepcopy + +import numpy as np +import torch + +from utils.general import LOGGER, colorstr +from utils.torch_utils import profile + + +def check_train_batch_size(model, imgsz=640, amp=True): + # Check YOLOv5 training batch size + with torch.cuda.amp.autocast(amp): + return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size + + +def autobatch(model, imgsz=640, fraction=0.8, batch_size=16): + # Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory + # Usage: + # import torch + # from utils.autobatch import autobatch + # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False) + # print(autobatch(model)) + + # Check device + prefix = colorstr('AutoBatch: ') + LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}') + device = next(model.parameters()).device # get model device + if device.type == 'cpu': + LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}') + return batch_size + if torch.backends.cudnn.benchmark: + LOGGER.info(f'{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}') + return batch_size + + # Inspect CUDA memory + gb = 1 << 30 # bytes to GiB (1024 ** 3) + d = str(device).upper() # 'CUDA:0' + properties = torch.cuda.get_device_properties(device) # device properties + t = properties.total_memory / gb # GiB total + r = torch.cuda.memory_reserved(device) / gb # GiB reserved + a = torch.cuda.memory_allocated(device) / gb # GiB allocated + f = t - (r + a) # GiB free + LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') + + # Profile batch sizes + batch_sizes = [1, 2, 4, 8, 16] + try: + img = [torch.empty(b, 3, imgsz, imgsz) for b in batch_sizes] + results = profile(img, model, n=3, device=device) + except Exception as e: + LOGGER.warning(f'{prefix}{e}') + + # Fit a solution + y = [x[2] for x in results if x] # memory [2] + p = np.polyfit(batch_sizes[:len(y)], y, deg=1) # first degree polynomial fit + b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) + if None in results: # some sizes failed + i = results.index(None) # first fail index + if b >= batch_sizes[i]: # y intercept above failure point + b = batch_sizes[max(i - 1, 0)] # select prior safe point + if b < 1 or b > 1024: # b outside of safe range + b = batch_size + LOGGER.warning(f'{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.') + + fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted + LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅') + return b diff --git a/utils/callbacks.py b/utils/callbacks.py new file mode 100644 index 0000000000000000000000000000000000000000..893708bb7fcc10b0e51f5b51360a2c2c86afe462 --- /dev/null +++ b/utils/callbacks.py @@ -0,0 +1,71 @@ +import threading + + +class Callbacks: + """" + Handles all registered callbacks for YOLOv5 Hooks + """ + + def __init__(self): + # Define the available callbacks + self._callbacks = { + 'on_pretrain_routine_start': [], + 'on_pretrain_routine_end': [], + 'on_train_start': [], + 'on_train_epoch_start': [], + 'on_train_batch_start': [], + 'optimizer_step': [], + 'on_before_zero_grad': [], + 'on_train_batch_end': [], + 'on_train_epoch_end': [], + 'on_val_start': [], + 'on_val_batch_start': [], + 'on_val_image_end': [], + 'on_val_batch_end': [], + 'on_val_end': [], + 'on_fit_epoch_end': [], # fit = train + val + 'on_model_save': [], + 'on_train_end': [], + 'on_params_update': [], + 'teardown': [],} + self.stop_training = False # set True to interrupt training + + def register_action(self, hook, name='', callback=None): + """ + Register a new action to a callback hook + + Args: + hook: The callback hook name to register the action to + name: The name of the action for later reference + callback: The callback to fire + """ + assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" + assert callable(callback), f"callback '{callback}' is not callable" + self._callbacks[hook].append({'name': name, 'callback': callback}) + + def get_registered_actions(self, hook=None): + """" + Returns all the registered actions by callback hook + + Args: + hook: The name of the hook to check, defaults to all + """ + return self._callbacks[hook] if hook else self._callbacks + + def run(self, hook, *args, thread=False, **kwargs): + """ + Loop through the registered actions and fire all callbacks on main thread + + Args: + hook: The name of the hook to check, defaults to all + args: Arguments to receive from YOLOv5 + thread: (boolean) Run callbacks in daemon thread + kwargs: Keyword Arguments to receive from YOLOv5 + """ + + assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" + for logger in self._callbacks[hook]: + if thread: + threading.Thread(target=logger['callback'], args=args, kwargs=kwargs, daemon=True).start() + else: + logger['callback'](*args, **kwargs) diff --git a/utils/dataloaders.py b/utils/dataloaders.py new file mode 100644 index 0000000000000000000000000000000000000000..77604299954f7f7207370f83f6c01530f76fcbc1 --- /dev/null +++ b/utils/dataloaders.py @@ -0,0 +1,1217 @@ +import contextlib +import glob +import hashlib +import json +import math +import os +import random +import shutil +import time +from itertools import repeat +from multiprocessing.pool import Pool, ThreadPool +from pathlib import Path +from threading import Thread +from urllib.parse import urlparse + +import numpy as np +import psutil +import torch +import torch.nn.functional as F +import torchvision +import yaml +from PIL import ExifTags, Image, ImageOps +from torch.utils.data import DataLoader, Dataset, dataloader, distributed +from tqdm import tqdm + +from utils.augmentations import (Albumentations, augment_hsv, classify_albumentations, classify_transforms, copy_paste, + letterbox, mixup, random_perspective) +from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, TQDM_BAR_FORMAT, check_dataset, check_requirements, + check_yaml, clean_str, cv2, is_colab, is_kaggle, segments2boxes, unzip_file, xyn2xy, + xywh2xyxy, xywhn2xyxy, xyxy2xywhn) +from utils.torch_utils import torch_distributed_zero_first + +# Parameters +HELP_URL = 'See https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' +IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp', 'pfm' # include image suffixes +VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +PIN_MEMORY = str(os.getenv('PIN_MEMORY', True)).lower() == 'true' # global pin_memory for dataloaders + +# Get orientation exif tag +for orientation in ExifTags.TAGS.keys(): + if ExifTags.TAGS[orientation] == 'Orientation': + break + + +def get_hash(paths): + # Returns a single hash value of a list of paths (files or dirs) + size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes + h = hashlib.md5(str(size).encode()) # hash sizes + h.update(''.join(paths).encode()) # hash paths + return h.hexdigest() # return hash + + +def exif_size(img): + # Returns exif-corrected PIL size + s = img.size # (width, height) + with contextlib.suppress(Exception): + rotation = dict(img._getexif().items())[orientation] + if rotation in [6, 8]: # rotation 270 or 90 + s = (s[1], s[0]) + return s + + +def exif_transpose(image): + """ + Transpose a PIL image accordingly if it has an EXIF Orientation tag. + Inplace version of https://github.com/python-pillow/Pillow/blob/master/src/PIL/ImageOps.py exif_transpose() + + :param image: The image to transpose. + :return: An image. + """ + exif = image.getexif() + orientation = exif.get(0x0112, 1) # default 1 + if orientation > 1: + method = { + 2: Image.FLIP_LEFT_RIGHT, + 3: Image.ROTATE_180, + 4: Image.FLIP_TOP_BOTTOM, + 5: Image.TRANSPOSE, + 6: Image.ROTATE_270, + 7: Image.TRANSVERSE, + 8: Image.ROTATE_90}.get(orientation) + if method is not None: + image = image.transpose(method) + del exif[0x0112] + image.info["exif"] = exif.tobytes() + return image + + +def seed_worker(worker_id): + # Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader + worker_seed = torch.initial_seed() % 2 ** 32 + np.random.seed(worker_seed) + random.seed(worker_seed) + + +def create_dataloader(path, + imgsz, + batch_size, + stride, + single_cls=False, + hyp=None, + augment=False, + cache=False, + pad=0.0, + rect=False, + rank=-1, + workers=8, + image_weights=False, + close_mosaic=False, + quad=False, + min_items=0, + prefix='', + shuffle=False): + if rect and shuffle: + LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') + shuffle = False + with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = LoadImagesAndLabels( + path, + imgsz, + batch_size, + augment=augment, # augmentation + hyp=hyp, # hyperparameters + rect=rect, # rectangular batches + cache_images=cache, + single_cls=single_cls, + stride=int(stride), + pad=pad, + image_weights=image_weights, + min_items=min_items, + prefix=prefix) + + batch_size = min(batch_size, len(dataset)) + nd = torch.cuda.device_count() # number of CUDA devices + nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) + #loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates + loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader + generator = torch.Generator() + generator.manual_seed(6148914691236517205 + RANK) + return loader(dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=PIN_MEMORY, + collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn, + worker_init_fn=seed_worker, + generator=generator), dataset + + +class InfiniteDataLoader(dataloader.DataLoader): + """ Dataloader that reuses workers + + Uses same syntax as vanilla DataLoader + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) + self.iterator = super().__iter__() + + def __len__(self): + return len(self.batch_sampler.sampler) + + def __iter__(self): + for _ in range(len(self)): + yield next(self.iterator) + + +class _RepeatSampler: + """ Sampler that repeats forever + + Args: + sampler (Sampler) + """ + + def __init__(self, sampler): + self.sampler = sampler + + def __iter__(self): + while True: + yield from iter(self.sampler) + + +class LoadScreenshots: + # YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"` + def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None): + # source = [screen_number left top width height] (pixels) + check_requirements('mss') + import mss + + source, *params = source.split() + self.screen, left, top, width, height = 0, None, None, None, None # default to full screen 0 + if len(params) == 1: + self.screen = int(params[0]) + elif len(params) == 4: + left, top, width, height = (int(x) for x in params) + elif len(params) == 5: + self.screen, left, top, width, height = (int(x) for x in params) + self.img_size = img_size + self.stride = stride + self.transforms = transforms + self.auto = auto + self.mode = 'stream' + self.frame = 0 + self.sct = mss.mss() + + # Parse monitor shape + monitor = self.sct.monitors[self.screen] + self.top = monitor["top"] if top is None else (monitor["top"] + top) + self.left = monitor["left"] if left is None else (monitor["left"] + left) + self.width = width or monitor["width"] + self.height = height or monitor["height"] + self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height} + + def __iter__(self): + return self + + def __next__(self): + # mss screen capture: get raw pixels from the screen as np array + im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR + s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: " + + if self.transforms: + im = self.transforms(im0) # transforms + else: + im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize + im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + im = np.ascontiguousarray(im) # contiguous + self.frame += 1 + return str(self.screen), im, im0, None, s # screen, img, original img, im0s, s + + +class LoadImages: + # YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4` + def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): + files = [] + for p in sorted(path) if isinstance(path, (list, tuple)) else [path]: + p = str(Path(p).resolve()) + if '*' in p: + files.extend(sorted(glob.glob(p, recursive=True))) # glob + elif os.path.isdir(p): + files.extend(sorted(glob.glob(os.path.join(p, '*.*')))) # dir + elif os.path.isfile(p): + files.append(p) # files + else: + raise FileNotFoundError(f'{p} does not exist') + + images = [x for x in files if x.split('.')[-1].lower() in IMG_FORMATS] + videos = [x for x in files if x.split('.')[-1].lower() in VID_FORMATS] + ni, nv = len(images), len(videos) + + self.img_size = img_size + self.stride = stride + self.files = images + videos + self.nf = ni + nv # number of files + self.video_flag = [False] * ni + [True] * nv + self.mode = 'image' + self.auto = auto + self.transforms = transforms # optional + self.vid_stride = vid_stride # video frame-rate stride + if any(videos): + self._new_video(videos[0]) # new video + else: + self.cap = None + assert self.nf > 0, f'No images or videos found in {p}. ' \ + f'Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}' + + def __iter__(self): + self.count = 0 + return self + + def __next__(self): + if self.count == self.nf: + raise StopIteration + path = self.files[self.count] + + if self.video_flag[self.count]: + # Read video + self.mode = 'video' + for _ in range(self.vid_stride): + self.cap.grab() + ret_val, im0 = self.cap.retrieve() + while not ret_val: + self.count += 1 + self.cap.release() + if self.count == self.nf: # last video + raise StopIteration + path = self.files[self.count] + self._new_video(path) + ret_val, im0 = self.cap.read() + + self.frame += 1 + # im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False + s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: ' + + else: + # Read image + self.count += 1 + im0 = cv2.imread(path) # BGR + assert im0 is not None, f'Image Not Found {path}' + s = f'image {self.count}/{self.nf} {path}: ' + + if self.transforms: + im = self.transforms(im0) # transforms + else: + im = letterbox(im0, self.img_size, stride=self.stride, auto=self.auto)[0] # padded resize + im = im.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + im = np.ascontiguousarray(im) # contiguous + + return path, im, im0, self.cap, s + + def _new_video(self, path): + # Create a new video capture object + self.frame = 0 + self.cap = cv2.VideoCapture(path) + self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride) + self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees + # self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493 + + def _cv2_rotate(self, im): + # Rotate a cv2 video manually + if self.orientation == 0: + return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE) + elif self.orientation == 180: + return cv2.rotate(im, cv2.ROTATE_90_COUNTERCLOCKWISE) + elif self.orientation == 90: + return cv2.rotate(im, cv2.ROTATE_180) + return im + + def __len__(self): + return self.nf # number of files + + +class LoadStreams: + # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams` + def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1): + torch.backends.cudnn.benchmark = True # faster for fixed-size inference + self.mode = 'stream' + self.img_size = img_size + self.stride = stride + self.vid_stride = vid_stride # video frame-rate stride + sources = Path(sources).read_text().rsplit() if os.path.isfile(sources) else [sources] + n = len(sources) + self.sources = [clean_str(x) for x in sources] # clean source names for later + self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n + for i, s in enumerate(sources): # index, source + # Start thread to read frames from video stream + st = f'{i + 1}/{n}: {s}... ' + if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'): # if source is YouTube video + # YouTube format i.e. 'https://www.youtube.com/watch?v=Zgi9g1ksQHc' or 'https://youtu.be/Zgi9g1ksQHc' + check_requirements(('pafy', 'youtube_dl==2020.12.2')) + import pafy + s = pafy.new(s).getbest(preftype="mp4").url # YouTube URL + s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam + if s == 0: + assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.' + assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.' + cap = cv2.VideoCapture(s) + assert cap.isOpened(), f'{st}Failed to open {s}' + w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) # warning: may return 0 or nan + self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback + self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30 # 30 FPS fallback + + _, self.imgs[i] = cap.read() # guarantee first frame + self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True) + LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)") + self.threads[i].start() + LOGGER.info('') # newline + + # check for common shapes + s = np.stack([letterbox(x, img_size, stride=stride, auto=auto)[0].shape for x in self.imgs]) + self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal + self.auto = auto and self.rect + self.transforms = transforms # optional + if not self.rect: + LOGGER.warning('WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.') + + def update(self, i, cap, stream): + # Read stream `i` frames in daemon thread + n, f = 0, self.frames[i] # frame number, frame array + while cap.isOpened() and n < f: + n += 1 + cap.grab() # .read() = .grab() followed by .retrieve() + if n % self.vid_stride == 0: + success, im = cap.retrieve() + if success: + self.imgs[i] = im + else: + LOGGER.warning('WARNING ⚠️ Video stream unresponsive, please check your IP camera connection.') + self.imgs[i] = np.zeros_like(self.imgs[i]) + cap.open(stream) # re-open stream if signal was lost + time.sleep(0.0) # wait time + + def __iter__(self): + self.count = -1 + return self + + def __next__(self): + self.count += 1 + if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit + cv2.destroyAllWindows() + raise StopIteration + + im0 = self.imgs.copy() + if self.transforms: + im = np.stack([self.transforms(x) for x in im0]) # transforms + else: + im = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0] for x in im0]) # resize + im = im[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW + im = np.ascontiguousarray(im) # contiguous + + return self.sources, im, im0, None, '' + + def __len__(self): + return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years + + +def img2label_paths(img_paths): + # Define label paths as a function of image paths + sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings + return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] + + +class LoadImagesAndLabels(Dataset): + # YOLOv5 train_loader/val_loader, loads images and labels for training and validation + cache_version = 0.6 # dataset labels *.cache version + rand_interp_methods = [cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4] + + def __init__(self, + path, + img_size=640, + batch_size=16, + augment=False, + hyp=None, + rect=False, + image_weights=False, + cache_images=False, + single_cls=False, + stride=32, + pad=0.0, + min_items=0, + prefix=''): + self.img_size = img_size + self.augment = augment + self.hyp = hyp + self.image_weights = image_weights + self.rect = False if image_weights else rect + self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) + self.mosaic_border = [-img_size // 2, -img_size // 2] + self.stride = stride + self.path = path + self.albumentations = Albumentations(size=img_size) if augment else None + + try: + f = [] # image files + for p in path if isinstance(path, list) else [path]: + p = Path(p) # os-agnostic + if p.is_dir(): # dir + f += glob.glob(str(p / '**' / '*.*'), recursive=True) + # f = list(p.rglob('*.*')) # pathlib + elif p.is_file(): # file + with open(p) as t: + t = t.read().strip().splitlines() + parent = str(p.parent) + os.sep + f += [x.replace('./', parent, 1) if x.startswith('./') else x for x in t] # to global path + # f += [p.parent / x.lstrip(os.sep) for x in t] # to global path (pathlib) + else: + raise FileNotFoundError(f'{prefix}{p} does not exist') + self.im_files = sorted(x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS) + # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib + assert self.im_files, f'{prefix}No images found' + except Exception as e: + raise Exception(f'{prefix}Error loading data from {path}: {e}\n{HELP_URL}') from e + + # Check cache + self.label_files = img2label_paths(self.im_files) # labels + cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') + try: + cache, exists = np.load(cache_path, allow_pickle=True).item(), True # load dict + assert cache['version'] == self.cache_version # matches current version + assert cache['hash'] == get_hash(self.label_files + self.im_files) # identical hash + except Exception: + cache, exists = self.cache_labels(cache_path, prefix), False # run cache ops + + # Display cache + nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total + if exists and LOCAL_RANK in {-1, 0}: + d = f"Scanning {cache_path}... {nf} images, {nm + ne} backgrounds, {nc} corrupt" + tqdm(None, desc=prefix + d, total=n, initial=n, bar_format=TQDM_BAR_FORMAT) # display cache results + if cache['msgs']: + LOGGER.info('\n'.join(cache['msgs'])) # display warnings + assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}' + + # Read cache + [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items + labels, shapes, self.segments = zip(*cache.values()) + nl = len(np.concatenate(labels, 0)) # number of labels + assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}' + self.labels = list(labels) + self.shapes = np.array(shapes) + self.im_files = list(cache.keys()) # update + self.label_files = img2label_paths(cache.keys()) # update + + # Filter images + if min_items: + include = np.array([len(x) >= min_items for x in self.labels]).nonzero()[0].astype(int) + LOGGER.info(f'{prefix}{n - len(include)}/{n} images filtered from dataset') + self.im_files = [self.im_files[i] for i in include] + self.label_files = [self.label_files[i] for i in include] + self.labels = [self.labels[i] for i in include] + self.segments = [self.segments[i] for i in include] + self.shapes = self.shapes[include] # wh + + # Create indices + n = len(self.shapes) # number of images + bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index + nb = bi[-1] + 1 # number of batches + self.batch = bi # batch index of image + self.n = n + self.indices = range(n) + + # Update labels + include_class = [] # filter labels to include only these classes (optional) + include_class_array = np.array(include_class).reshape(1, -1) + for i, (label, segment) in enumerate(zip(self.labels, self.segments)): + if include_class: + j = (label[:, 0:1] == include_class_array).any(1) + self.labels[i] = label[j] + if segment: + self.segments[i] = segment[j] + if single_cls: # single-class training, merge all classes into 0 + self.labels[i][:, 0] = 0 + + # Rectangular Training + if self.rect: + # Sort by aspect ratio + s = self.shapes # wh + ar = s[:, 1] / s[:, 0] # aspect ratio + irect = ar.argsort() + self.im_files = [self.im_files[i] for i in irect] + self.label_files = [self.label_files[i] for i in irect] + self.labels = [self.labels[i] for i in irect] + self.segments = [self.segments[i] for i in irect] + self.shapes = s[irect] # wh + ar = ar[irect] + + # Set training image shapes + shapes = [[1, 1]] * nb + for i in range(nb): + ari = ar[bi == i] + mini, maxi = ari.min(), ari.max() + if maxi < 1: + shapes[i] = [maxi, 1] + elif mini > 1: + shapes[i] = [1, 1 / mini] + + self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride + + # Cache images into RAM/disk for faster training + if cache_images == 'ram' and not self.check_cache_ram(prefix=prefix): + cache_images = False + self.ims = [None] * n + self.npy_files = [Path(f).with_suffix('.npy') for f in self.im_files] + if cache_images: + b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes + self.im_hw0, self.im_hw = [None] * n, [None] * n + fcn = self.cache_images_to_disk if cache_images == 'disk' else self.load_image + results = ThreadPool(NUM_THREADS).imap(fcn, range(n)) + pbar = tqdm(enumerate(results), total=n, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0) + for i, x in pbar: + if cache_images == 'disk': + b += self.npy_files[i].stat().st_size + else: # 'ram' + self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i) + b += self.ims[i].nbytes + pbar.desc = f'{prefix}Caching images ({b / gb:.1f}GB {cache_images})' + pbar.close() + + def check_cache_ram(self, safety_margin=0.1, prefix=''): + # Check image caching requirements vs available memory + b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes + n = min(self.n, 30) # extrapolate from 30 random images + for _ in range(n): + im = cv2.imread(random.choice(self.im_files)) # sample image + ratio = self.img_size / max(im.shape[0], im.shape[1]) # max(h, w) # ratio + b += im.nbytes * ratio ** 2 + mem_required = b * self.n / n # GB required to cache dataset into RAM + mem = psutil.virtual_memory() + cache = mem_required * (1 + safety_margin) < mem.available # to cache or not to cache, that is the question + if not cache: + LOGGER.info(f"{prefix}{mem_required / gb:.1f}GB RAM required, " + f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, " + f"{'caching images ✅' if cache else 'not caching images ⚠️'}") + return cache + + def cache_labels(self, path=Path('./labels.cache'), prefix=''): + # Cache dataset labels, check images and read shapes + x = {} # dict + nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages + desc = f"{prefix}Scanning {path.parent / path.stem}..." + with Pool(NUM_THREADS) as pool: + pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.label_files, repeat(prefix))), + desc=desc, + total=len(self.im_files), + bar_format=TQDM_BAR_FORMAT) + for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: + nm += nm_f + nf += nf_f + ne += ne_f + nc += nc_f + if im_file: + x[im_file] = [lb, shape, segments] + if msg: + msgs.append(msg) + pbar.desc = f"{desc} {nf} images, {nm + ne} backgrounds, {nc} corrupt" + + pbar.close() + if msgs: + LOGGER.info('\n'.join(msgs)) + if nf == 0: + LOGGER.warning(f'{prefix}WARNING ⚠️ No labels found in {path}. {HELP_URL}') + x['hash'] = get_hash(self.label_files + self.im_files) + x['results'] = nf, nm, ne, nc, len(self.im_files) + x['msgs'] = msgs # warnings + x['version'] = self.cache_version # cache version + try: + np.save(path, x) # save cache for next time + path.with_suffix('.cache.npy').rename(path) # remove .npy suffix + LOGGER.info(f'{prefix}New cache created: {path}') + except Exception as e: + LOGGER.warning(f'{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable: {e}') # not writeable + return x + + def __len__(self): + return len(self.im_files) + + # def __iter__(self): + # self.count = -1 + # print('ran dataset iter') + # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) + # return self + + def __getitem__(self, index): + index = self.indices[index] # linear, shuffled, or image_weights + + hyp = self.hyp + mosaic = self.mosaic and random.random() < hyp['mosaic'] + if mosaic: + # Load mosaic + img, labels = self.load_mosaic(index) + shapes = None + + # MixUp augmentation + if random.random() < hyp['mixup']: + img, labels = mixup(img, labels, *self.load_mosaic(random.randint(0, self.n - 1))) + + else: + # Load image + img, (h0, w0), (h, w) = self.load_image(index) + + # Letterbox + shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + labels = self.labels[index].copy() + if labels.size: # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) + + if self.augment: + img, labels = random_perspective(img, + labels, + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear'], + perspective=hyp['perspective']) + + nl = len(labels) # number of labels + if nl: + labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1E-3) + + if self.augment: + # Albumentations + img, labels = self.albumentations(img, labels) + nl = len(labels) # update after albumentations + + # HSV color-space + augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) + + # Flip up-down + if random.random() < hyp['flipud']: + img = np.flipud(img) + if nl: + labels[:, 2] = 1 - labels[:, 2] + + # Flip left-right + if random.random() < hyp['fliplr']: + img = np.fliplr(img) + if nl: + labels[:, 1] = 1 - labels[:, 1] + + # Cutouts + # labels = cutout(img, labels, p=0.5) + # nl = len(labels) # update after cutout + + labels_out = torch.zeros((nl, 6)) + if nl: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Convert + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) + + return torch.from_numpy(img), labels_out, self.im_files[index], shapes + + def load_image(self, i): + # Loads 1 image from dataset index 'i', returns (im, original hw, resized hw) + im, f, fn = self.ims[i], self.im_files[i], self.npy_files[i], + if im is None: # not cached in RAM + if fn.exists(): # load npy + im = np.load(fn) + else: # read image + im = cv2.imread(f) # BGR + assert im is not None, f'Image Not Found {f}' + h0, w0 = im.shape[:2] # orig hw + r = self.img_size / max(h0, w0) # ratio + if r != 1: # if sizes are not equal + interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA + im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp) + return im, (h0, w0), im.shape[:2] # im, hw_original, hw_resized + return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized + + def cache_images_to_disk(self, i): + # Saves an image as an *.npy file for faster loading + f = self.npy_files[i] + if not f.exists(): + np.save(f.as_posix(), cv2.imread(self.im_files[i])) + + def load_mosaic(self, index): + # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic + labels4, segments4 = [], [] + s = self.img_size + yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y + indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices + random.shuffle(indices) + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = self.load_image(index) + + # place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + # Labels + labels, segments = self.labels[index].copy(), self.segments[index].copy() + if labels.size: + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padw, padh) for x in segments] + labels4.append(labels) + segments4.extend(segments) + + # Concat/clip labels + labels4 = np.concatenate(labels4, 0) + for x in (labels4[:, 1:], *segments4): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img4, labels4 = replicate(img4, labels4) # replicate + + # Augment + img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp['copy_paste']) + img4, labels4 = random_perspective(img4, + labels4, + segments4, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove + + return img4, labels4 + + def load_mosaic9(self, index): + # YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic + labels9, segments9 = [], [] + s = self.img_size + indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices + random.shuffle(indices) + hp, wp = -1, -1 # height, width previous + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = self.load_image(index) + + # place img in img9 + if i == 0: # center + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + h0, w0 = h, w + c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates + elif i == 1: # top + c = s, s - h, s + w, s + elif i == 2: # top right + c = s + wp, s - h, s + wp + w, s + elif i == 3: # right + c = s + w0, s, s + w0 + w, s + h + elif i == 4: # bottom right + c = s + w0, s + hp, s + w0 + w, s + hp + h + elif i == 5: # bottom + c = s + w0 - w, s + h0, s + w0, s + h0 + h + elif i == 6: # bottom left + c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h + elif i == 7: # left + c = s - w, s + h0 - h, s, s + h0 + elif i == 8: # top left + c = s - w, s + h0 - hp - h, s, s + h0 - hp + + padx, pady = c[:2] + x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords + + # Labels + labels, segments = self.labels[index].copy(), self.segments[index].copy() + if labels.size: + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padx, pady) for x in segments] + labels9.append(labels) + segments9.extend(segments) + + # Image + img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] + hp, wp = h, w # height, width previous + + # Offset + yc, xc = (int(random.uniform(0, s)) for _ in self.mosaic_border) # mosaic center x, y + img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] + + # Concat/clip labels + labels9 = np.concatenate(labels9, 0) + labels9[:, [1, 3]] -= xc + labels9[:, [2, 4]] -= yc + c = np.array([xc, yc]) # centers + segments9 = [x - c for x in segments9] + + for x in (labels9[:, 1:], *segments9): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img9, labels9 = replicate(img9, labels9) # replicate + + # Augment + img9, labels9, segments9 = copy_paste(img9, labels9, segments9, p=self.hyp['copy_paste']) + img9, labels9 = random_perspective(img9, + labels9, + segments9, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove + + return img9, labels9 + + @staticmethod + def collate_fn(batch): + im, label, path, shapes = zip(*batch) # transposed + for i, lb in enumerate(label): + lb[:, 0] = i # add target image index for build_targets() + return torch.stack(im, 0), torch.cat(label, 0), path, shapes + + @staticmethod + def collate_fn4(batch): + im, label, path, shapes = zip(*batch) # transposed + n = len(shapes) // 4 + im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] + + ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) + wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) + s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale + for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW + i *= 4 + if random.random() < 0.5: + im1 = F.interpolate(im[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', + align_corners=False)[0].type(im[i].type()) + lb = label[i] + else: + im1 = torch.cat((torch.cat((im[i], im[i + 1]), 1), torch.cat((im[i + 2], im[i + 3]), 1)), 2) + lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s + im4.append(im1) + label4.append(lb) + + for i, lb in enumerate(label4): + lb[:, 0] = i # add target image index for build_targets() + + return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4 + + +# Ancillary functions -------------------------------------------------------------------------------------------------- +def flatten_recursive(path=DATASETS_DIR / 'coco128'): + # Flatten a recursive directory by bringing all files to top level + new_path = Path(f'{str(path)}_flat') + if os.path.exists(new_path): + shutil.rmtree(new_path) # delete output folder + os.makedirs(new_path) # make new output folder + for file in tqdm(glob.glob(f'{str(Path(path))}/**/*.*', recursive=True)): + shutil.copyfile(file, new_path / Path(file).name) + + +def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataloaders import *; extract_boxes() + # Convert detection dataset into classification dataset, with one directory per class + path = Path(path) # images dir + shutil.rmtree(path / 'classification') if (path / 'classification').is_dir() else None # remove existing + files = list(path.rglob('*.*')) + n = len(files) # number of files + for im_file in tqdm(files, total=n): + if im_file.suffix[1:] in IMG_FORMATS: + # image + im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB + h, w = im.shape[:2] + + # labels + lb_file = Path(img2label_paths([str(im_file)])[0]) + if Path(lb_file).exists(): + with open(lb_file) as f: + lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels + + for j, x in enumerate(lb): + c = int(x[0]) # class + f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename + if not f.parent.is_dir(): + f.parent.mkdir(parents=True) + + b = x[1:] * [w, h, w, h] # box + # b[2:] = b[2:].max() # rectangle to square + b[2:] = b[2:] * 1.2 + 3 # pad + b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(int) + + b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image + b[[1, 3]] = np.clip(b[[1, 3]], 0, h) + assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}' + + +def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): + """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files + Usage: from utils.dataloaders import *; autosplit() + Arguments + path: Path to images directory + weights: Train, val, test weights (list, tuple) + annotated_only: Only use images with an annotated txt file + """ + path = Path(path) # images dir + files = sorted(x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS) # image files only + n = len(files) # number of files + random.seed(0) # for reproducibility + indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split + + txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files + for x in txt: + if (path.parent / x).exists(): + (path.parent / x).unlink() # remove existing + + print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only) + for i, img in tqdm(zip(indices, files), total=n): + if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label + with open(path.parent / txt[i], 'a') as f: + f.write(f'./{img.relative_to(path.parent).as_posix()}' + '\n') # add image to txt file + + +def verify_image_label(args): + # Verify one image-label pair + im_file, lb_file, prefix = args + nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, '', [] # number (missing, found, empty, corrupt), message, segments + try: + # verify images + im = Image.open(im_file) + im.verify() # PIL verify + shape = exif_size(im) # image size + assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels' + assert im.format.lower() in IMG_FORMATS, f'invalid image format {im.format}' + if im.format.lower() in ('jpg', 'jpeg'): + with open(im_file, 'rb') as f: + f.seek(-2, 2) + if f.read() != b'\xff\xd9': # corrupt JPEG + ImageOps.exif_transpose(Image.open(im_file)).save(im_file, 'JPEG', subsampling=0, quality=100) + msg = f'{prefix}WARNING ⚠️ {im_file}: corrupt JPEG restored and saved' + + # verify labels + if os.path.isfile(lb_file): + nf = 1 # label found + with open(lb_file) as f: + lb = [x.split() for x in f.read().strip().splitlines() if len(x)] + if any(len(x) > 6 for x in lb): # is segment + classes = np.array([x[0] for x in lb], dtype=np.float32) + segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in lb] # (cls, xy1...) + lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) + lb = np.array(lb, dtype=np.float32) + nl = len(lb) + if nl: + assert lb.shape[1] == 5, f'labels require 5 columns, {lb.shape[1]} columns detected' + assert (lb >= 0).all(), f'negative label values {lb[lb < 0]}' + assert (lb[:, 1:] <= 1).all(), f'non-normalized or out of bounds coordinates {lb[:, 1:][lb[:, 1:] > 1]}' + _, i = np.unique(lb, axis=0, return_index=True) + if len(i) < nl: # duplicate row check + lb = lb[i] # remove duplicates + if segments: + segments = [segments[x] for x in i] + msg = f'{prefix}WARNING ⚠️ {im_file}: {nl - len(i)} duplicate labels removed' + else: + ne = 1 # label empty + lb = np.zeros((0, 5), dtype=np.float32) + else: + nm = 1 # label missing + lb = np.zeros((0, 5), dtype=np.float32) + return im_file, lb, shape, segments, nm, nf, ne, nc, msg + except Exception as e: + nc = 1 + msg = f'{prefix}WARNING ⚠️ {im_file}: ignoring corrupt image/label: {e}' + return [None, None, None, None, nm, nf, ne, nc, msg] + + +class HUBDatasetStats(): + """ Class for generating HUB dataset JSON and `-hub` dataset directory + + Arguments + path: Path to data.yaml or data.zip (with data.yaml inside data.zip) + autodownload: Attempt to download dataset if not found locally + + Usage + from utils.dataloaders import HUBDatasetStats + stats = HUBDatasetStats('coco128.yaml', autodownload=True) # usage 1 + stats = HUBDatasetStats('path/to/coco128.zip') # usage 2 + stats.get_json(save=False) + stats.process_images() + """ + + def __init__(self, path='coco128.yaml', autodownload=False): + # Initialize class + zipped, data_dir, yaml_path = self._unzip(Path(path)) + try: + with open(check_yaml(yaml_path), errors='ignore') as f: + data = yaml.safe_load(f) # data dict + if zipped: + data['path'] = data_dir + except Exception as e: + raise Exception("error/HUB/dataset_stats/yaml_load") from e + + check_dataset(data, autodownload) # download dataset if missing + self.hub_dir = Path(data['path'] + '-hub') + self.im_dir = self.hub_dir / 'images' + self.im_dir.mkdir(parents=True, exist_ok=True) # makes /images + self.stats = {'nc': data['nc'], 'names': list(data['names'].values())} # statistics dictionary + self.data = data + + @staticmethod + def _find_yaml(dir): + # Return data.yaml file + files = list(dir.glob('*.yaml')) or list(dir.rglob('*.yaml')) # try root level first and then recursive + assert files, f'No *.yaml file found in {dir}' + if len(files) > 1: + files = [f for f in files if f.stem == dir.stem] # prefer *.yaml files that match dir name + assert files, f'Multiple *.yaml files found in {dir}, only 1 *.yaml file allowed' + assert len(files) == 1, f'Multiple *.yaml files found: {files}, only 1 *.yaml file allowed in {dir}' + return files[0] + + def _unzip(self, path): + # Unzip data.zip + if not str(path).endswith('.zip'): # path is data.yaml + return False, None, path + assert Path(path).is_file(), f'Error unzipping {path}, file not found' + unzip_file(path, path=path.parent) + dir = path.with_suffix('') # dataset directory == zip name + assert dir.is_dir(), f'Error unzipping {path}, {dir} not found. path/to/abc.zip MUST unzip to path/to/abc/' + return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path + + def _hub_ops(self, f, max_dim=1920): + # HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing + f_new = self.im_dir / Path(f).name # dataset-hub image filename + try: # use PIL + im = Image.open(f) + r = max_dim / max(im.height, im.width) # ratio + if r < 1.0: # image too large + im = im.resize((int(im.width * r), int(im.height * r))) + im.save(f_new, 'JPEG', quality=50, optimize=True) # save + except Exception as e: # use OpenCV + LOGGER.info(f'WARNING ⚠️ HUB ops PIL failure {f}: {e}') + im = cv2.imread(f) + im_height, im_width = im.shape[:2] + r = max_dim / max(im_height, im_width) # ratio + if r < 1.0: # image too large + im = cv2.resize(im, (int(im_width * r), int(im_height * r)), interpolation=cv2.INTER_AREA) + cv2.imwrite(str(f_new), im) + + def get_json(self, save=False, verbose=False): + # Return dataset JSON for Ultralytics HUB + def _round(labels): + # Update labels to integer class and 6 decimal place floats + return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels] + + for split in 'train', 'val', 'test': + if self.data.get(split) is None: + self.stats[split] = None # i.e. no test set + continue + dataset = LoadImagesAndLabels(self.data[split]) # load dataset + x = np.array([ + np.bincount(label[:, 0].astype(int), minlength=self.data['nc']) + for label in tqdm(dataset.labels, total=dataset.n, desc='Statistics')]) # shape(128x80) + self.stats[split] = { + 'instance_stats': { + 'total': int(x.sum()), + 'per_class': x.sum(0).tolist()}, + 'image_stats': { + 'total': dataset.n, + 'unlabelled': int(np.all(x == 0, 1).sum()), + 'per_class': (x > 0).sum(0).tolist()}, + 'labels': [{ + str(Path(k).name): _round(v.tolist())} for k, v in zip(dataset.im_files, dataset.labels)]} + + # Save, print and return + if save: + stats_path = self.hub_dir / 'stats.json' + print(f'Saving {stats_path.resolve()}...') + with open(stats_path, 'w') as f: + json.dump(self.stats, f) # save stats.json + if verbose: + print(json.dumps(self.stats, indent=2, sort_keys=False)) + return self.stats + + def process_images(self): + # Compress images for Ultralytics HUB + for split in 'train', 'val', 'test': + if self.data.get(split) is None: + continue + dataset = LoadImagesAndLabels(self.data[split]) # load dataset + desc = f'{split} images' + for _ in tqdm(ThreadPool(NUM_THREADS).imap(self._hub_ops, dataset.im_files), total=dataset.n, desc=desc): + pass + print(f'Done. All images saved to {self.im_dir}') + return self.im_dir + + +# Classification dataloaders ------------------------------------------------------------------------------------------- +class ClassificationDataset(torchvision.datasets.ImageFolder): + """ + YOLOv5 Classification Dataset. + Arguments + root: Dataset path + transform: torchvision transforms, used by default + album_transform: Albumentations transforms, used if installed + """ + + def __init__(self, root, augment, imgsz, cache=False): + super().__init__(root=root) + self.torch_transforms = classify_transforms(imgsz) + self.album_transforms = classify_albumentations(augment, imgsz) if augment else None + self.cache_ram = cache is True or cache == 'ram' + self.cache_disk = cache == 'disk' + self.samples = [list(x) + [Path(x[0]).with_suffix('.npy'), None] for x in self.samples] # file, index, npy, im + + def __getitem__(self, i): + f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image + if self.cache_ram and im is None: + im = self.samples[i][3] = cv2.imread(f) + elif self.cache_disk: + if not fn.exists(): # load npy + np.save(fn.as_posix(), cv2.imread(f)) + im = np.load(fn) + else: # read image + im = cv2.imread(f) # BGR + if self.album_transforms: + sample = self.album_transforms(image=cv2.cvtColor(im, cv2.COLOR_BGR2RGB))["image"] + else: + sample = self.torch_transforms(im) + return sample, j + + +def create_classification_dataloader(path, + imgsz=224, + batch_size=16, + augment=True, + cache=False, + rank=-1, + workers=8, + shuffle=True): + # Returns Dataloader object to be used with YOLOv5 Classifier + with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = ClassificationDataset(root=path, imgsz=imgsz, augment=augment, cache=cache) + batch_size = min(batch_size, len(dataset)) + nd = torch.cuda.device_count() + nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) + sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) + generator = torch.Generator() + generator.manual_seed(6148914691236517205 + RANK) + return InfiniteDataLoader(dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=PIN_MEMORY, + worker_init_fn=seed_worker, + generator=generator) # or DataLoader(persistent_workers=True) diff --git a/utils/downloads.py b/utils/downloads.py new file mode 100644 index 0000000000000000000000000000000000000000..a108313b3988a59948b6db609659358ea236ac4e --- /dev/null +++ b/utils/downloads.py @@ -0,0 +1,103 @@ +import logging +import os +import subprocess +import urllib +from pathlib import Path + +import requests +import torch + + +def is_url(url, check=True): + # Check if string is URL and check if URL exists + try: + url = str(url) + result = urllib.parse.urlparse(url) + assert all([result.scheme, result.netloc]) # check if is url + return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online + except (AssertionError, urllib.request.HTTPError): + return False + + +def gsutil_getsize(url=''): + # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du + s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8') + return eval(s.split(' ')[0]) if len(s) else 0 # bytes + + +def url_getsize(url='https://ultralytics.com/images/bus.jpg'): + # Return downloadable file size in bytes + response = requests.head(url, allow_redirects=True) + return int(response.headers.get('content-length', -1)) + + +def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): + # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes + from utils.general import LOGGER + + file = Path(file) + assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}" + try: # url1 + LOGGER.info(f'Downloading {url} to {file}...') + torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO) + assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check + except Exception as e: # url2 + if file.exists(): + file.unlink() # remove partial downloads + LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...') + os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail + finally: + if not file.exists() or file.stat().st_size < min_bytes: # check + if file.exists(): + file.unlink() # remove partial downloads + LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}") + LOGGER.info('') + + +def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'): + # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc. + from utils.general import LOGGER + + def github_assets(repository, version='latest'): + # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...]) + if version != 'latest': + version = f'tags/{version}' # i.e. tags/v7.0 + response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api + return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets + + file = Path(str(file).strip().replace("'", '')) + if not file.exists(): + # URL specified + name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc. + if str(file).startswith(('http:/', 'https:/')): # download + url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ + file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth... + if Path(file).is_file(): + LOGGER.info(f'Found {url} locally at {file}') # file already exists + else: + safe_download(file=file, url=url, min_bytes=1E5) + return file + + # GitHub assets + assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default + try: + tag, assets = github_assets(repo, release) + except Exception: + try: + tag, assets = github_assets(repo) # latest release + except Exception: + try: + tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1] + except Exception: + tag = release + + file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required) + if name in assets: + url3 = 'https://drive.google.com/drive/folders/1EFQTEUeXWSFww0luse2jB9M1QNZQGwNl' # backup gdrive mirror + safe_download( + file, + url=f'https://github.com/{repo}/releases/download/{tag}/{name}', + min_bytes=1E5, + error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag} or {url3}') + + return str(file) diff --git a/utils/general.py b/utils/general.py new file mode 100644 index 0000000000000000000000000000000000000000..efe78b29ac69975890b47e6dd47d0c13024771a4 --- /dev/null +++ b/utils/general.py @@ -0,0 +1,1135 @@ +import contextlib +import glob +import inspect +import logging +import logging.config +import math +import os +import platform +import random +import re +import signal +import sys +import time +import urllib +from copy import deepcopy +from datetime import datetime +from itertools import repeat +from multiprocessing.pool import ThreadPool +from pathlib import Path +from subprocess import check_output +from tarfile import is_tarfile +from typing import Optional +from zipfile import ZipFile, is_zipfile + +import cv2 +import IPython +import numpy as np +import pandas as pd +import pkg_resources as pkg +import torch +import torchvision +import yaml + +from utils import TryExcept, emojis +from utils.downloads import gsutil_getsize +from utils.metrics import box_iou, fitness + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[1] # YOLO root directory +RANK = int(os.getenv('RANK', -1)) + +# Settings +NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads +DATASETS_DIR = Path(os.getenv('YOLOv5_DATASETS_DIR', ROOT.parent / 'datasets')) # global datasets directory +AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode +VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode +TQDM_BAR_FORMAT = '{l_bar}{bar:10}| {n_fmt}/{total_fmt} {elapsed}' # tqdm bar format +FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf + +torch.set_printoptions(linewidth=320, precision=5, profile='long') +np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 +pd.options.display.max_columns = 10 +cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) +os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads +os.environ['OMP_NUM_THREADS'] = '1' if platform.system() == 'darwin' else str(NUM_THREADS) # OpenMP (PyTorch and SciPy) + + +def is_ascii(s=''): + # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7) + s = str(s) # convert list, tuple, None, etc. to str + return len(s.encode().decode('ascii', 'ignore')) == len(s) + + +def is_chinese(s='人工智能'): + # Is string composed of any Chinese characters? + return bool(re.search('[\u4e00-\u9fff]', str(s))) + + +def is_colab(): + # Is environment a Google Colab instance? + return 'google.colab' in sys.modules + + +def is_notebook(): + # Is environment a Jupyter notebook? Verified on Colab, Jupyterlab, Kaggle, Paperspace + ipython_type = str(type(IPython.get_ipython())) + return 'colab' in ipython_type or 'zmqshell' in ipython_type + + +def is_kaggle(): + # Is environment a Kaggle Notebook? + return os.environ.get('PWD') == '/kaggle/working' and os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com' + + +def is_docker() -> bool: + """Check if the process runs inside a docker container.""" + if Path("/.dockerenv").exists(): + return True + try: # check if docker is in control groups + with open("/proc/self/cgroup") as file: + return any("docker" in line for line in file) + except OSError: + return False + + +def is_writeable(dir, test=False): + # Return True if directory has write permissions, test opening a file with write permissions if test=True + if not test: + return os.access(dir, os.W_OK) # possible issues on Windows + file = Path(dir) / 'tmp.txt' + try: + with open(file, 'w'): # open file with write permissions + pass + file.unlink() # remove file + return True + except OSError: + return False + + +LOGGING_NAME = "yolov5" + + +def set_logging(name=LOGGING_NAME, verbose=True): + # sets up logging for the given name + rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings + level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR + logging.config.dictConfig({ + "version": 1, + "disable_existing_loggers": False, + "formatters": { + name: { + "format": "%(message)s"}}, + "handlers": { + name: { + "class": "logging.StreamHandler", + "formatter": name, + "level": level,}}, + "loggers": { + name: { + "level": level, + "handlers": [name], + "propagate": False,}}}) + + +set_logging(LOGGING_NAME) # run before defining LOGGER +LOGGER = logging.getLogger(LOGGING_NAME) # define globally (used in train.py, val.py, detect.py, etc.) +if platform.system() == 'Windows': + for fn in LOGGER.info, LOGGER.warning: + setattr(LOGGER, fn.__name__, lambda x: fn(emojis(x))) # emoji safe logging + + +def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'): + # Return path of user configuration directory. Prefer environment variable if exists. Make dir if required. + env = os.getenv(env_var) + if env: + path = Path(env) # use environment variable + else: + cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs + path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir + path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable + path.mkdir(exist_ok=True) # make if required + return path + + +CONFIG_DIR = user_config_dir() # Ultralytics settings dir + + +class Profile(contextlib.ContextDecorator): + # YOLO Profile class. Usage: @Profile() decorator or 'with Profile():' context manager + def __init__(self, t=0.0): + self.t = t + self.cuda = torch.cuda.is_available() + + def __enter__(self): + self.start = self.time() + return self + + def __exit__(self, type, value, traceback): + self.dt = self.time() - self.start # delta-time + self.t += self.dt # accumulate dt + + def time(self): + if self.cuda: + torch.cuda.synchronize() + return time.time() + + +class Timeout(contextlib.ContextDecorator): + # YOLO Timeout class. Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager + def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True): + self.seconds = int(seconds) + self.timeout_message = timeout_msg + self.suppress = bool(suppress_timeout_errors) + + def _timeout_handler(self, signum, frame): + raise TimeoutError(self.timeout_message) + + def __enter__(self): + if platform.system() != 'Windows': # not supported on Windows + signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM + signal.alarm(self.seconds) # start countdown for SIGALRM to be raised + + def __exit__(self, exc_type, exc_val, exc_tb): + if platform.system() != 'Windows': + signal.alarm(0) # Cancel SIGALRM if it's scheduled + if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError + return True + + +class WorkingDirectory(contextlib.ContextDecorator): + # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager + def __init__(self, new_dir): + self.dir = new_dir # new dir + self.cwd = Path.cwd().resolve() # current dir + + def __enter__(self): + os.chdir(self.dir) + + def __exit__(self, exc_type, exc_val, exc_tb): + os.chdir(self.cwd) + + +def methods(instance): + # Get class/instance methods + return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")] + + +def print_args(args: Optional[dict] = None, show_file=True, show_func=False): + # Print function arguments (optional args dict) + x = inspect.currentframe().f_back # previous frame + file, _, func, _, _ = inspect.getframeinfo(x) + if args is None: # get args automatically + args, _, _, frm = inspect.getargvalues(x) + args = {k: v for k, v in frm.items() if k in args} + try: + file = Path(file).resolve().relative_to(ROOT).with_suffix('') + except ValueError: + file = Path(file).stem + s = (f'{file}: ' if show_file else '') + (f'{func}: ' if show_func else '') + LOGGER.info(colorstr(s) + ', '.join(f'{k}={v}' for k, v in args.items())) + + +def init_seeds(seed=0, deterministic=False): + # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe + # torch.backends.cudnn.benchmark = True # AutoBatch problem https://github.com/ultralytics/yolov5/issues/9287 + if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213 + torch.use_deterministic_algorithms(True) + torch.backends.cudnn.deterministic = True + os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' + os.environ['PYTHONHASHSEED'] = str(seed) + + +def intersect_dicts(da, db, exclude=()): + # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values + return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape} + + +def get_default_args(func): + # Get func() default arguments + signature = inspect.signature(func) + return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty} + + +def get_latest_run(search_dir='.'): + # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) + last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) + return max(last_list, key=os.path.getctime) if last_list else '' + + +def file_age(path=__file__): + # Return days since last file update + dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta + return dt.days # + dt.seconds / 86400 # fractional days + + +def file_date(path=__file__): + # Return human-readable file modification date, i.e. '2021-3-26' + t = datetime.fromtimestamp(Path(path).stat().st_mtime) + return f'{t.year}-{t.month}-{t.day}' + + +def file_size(path): + # Return file/dir size (MB) + mb = 1 << 20 # bytes to MiB (1024 ** 2) + path = Path(path) + if path.is_file(): + return path.stat().st_size / mb + elif path.is_dir(): + return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb + else: + return 0.0 + + +def check_online(): + # Check internet connectivity + import socket + + def run_once(): + # Check once + try: + socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility + return True + except OSError: + return False + + return run_once() or run_once() # check twice to increase robustness to intermittent connectivity issues + + +def git_describe(path=ROOT): # path must be a directory + # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe + try: + assert (Path(path) / '.git').is_dir() + return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1] + except Exception: + return '' + + +@TryExcept() +@WorkingDirectory(ROOT) +def check_git_status(repo='WongKinYiu/yolov9', branch='main'): + # YOLO status check, recommend 'git pull' if code is out of date + url = f'https://github.com/{repo}' + msg = f', for updates see {url}' + s = colorstr('github: ') # string + assert Path('.git').exists(), s + 'skipping check (not a git repository)' + msg + assert check_online(), s + 'skipping check (offline)' + msg + + splits = re.split(pattern=r'\s', string=check_output('git remote -v', shell=True).decode()) + matches = [repo in s for s in splits] + if any(matches): + remote = splits[matches.index(True) - 1] + else: + remote = 'ultralytics' + check_output(f'git remote add {remote} {url}', shell=True) + check_output(f'git fetch {remote}', shell=True, timeout=5) # git fetch + local_branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out + n = int(check_output(f'git rev-list {local_branch}..{remote}/{branch} --count', shell=True)) # commits behind + if n > 0: + pull = 'git pull' if remote == 'origin' else f'git pull {remote} {branch}' + s += f"⚠️ YOLO is out of date by {n} commit{'s' * (n > 1)}. Use `{pull}` or `git clone {url}` to update." + else: + s += f'up to date with {url} ✅' + LOGGER.info(s) + + +@WorkingDirectory(ROOT) +def check_git_info(path='.'): + # YOLO git info check, return {remote, branch, commit} + check_requirements('gitpython') + import git + try: + repo = git.Repo(path) + remote = repo.remotes.origin.url.replace('.git', '') # i.e. 'https://github.com/WongKinYiu/yolov9' + commit = repo.head.commit.hexsha # i.e. '3134699c73af83aac2a481435550b968d5792c0d' + try: + branch = repo.active_branch.name # i.e. 'main' + except TypeError: # not on any branch + branch = None # i.e. 'detached HEAD' state + return {'remote': remote, 'branch': branch, 'commit': commit} + except git.exc.InvalidGitRepositoryError: # path is not a git dir + return {'remote': None, 'branch': None, 'commit': None} + + +def check_python(minimum='3.7.0'): + # Check current python version vs. required python version + check_version(platform.python_version(), minimum, name='Python ', hard=True) + + +def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False): + # Check version vs. required version + current, minimum = (pkg.parse_version(x) for x in (current, minimum)) + result = (current == minimum) if pinned else (current >= minimum) # bool + s = f'WARNING ⚠️ {name}{minimum} is required by YOLO, but {name}{current} is currently installed' # string + if hard: + assert result, emojis(s) # assert min requirements met + if verbose and not result: + LOGGER.warning(s) + return result + + +@TryExcept() +def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=''): + # Check installed dependencies meet YOLO requirements (pass *.txt file or list of packages or single package str) + prefix = colorstr('red', 'bold', 'requirements:') + check_python() # check python version + if isinstance(requirements, Path): # requirements.txt file + file = requirements.resolve() + assert file.exists(), f"{prefix} {file} not found, check failed." + with file.open() as f: + requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude] + elif isinstance(requirements, str): + requirements = [requirements] + + s = '' + n = 0 + for r in requirements: + try: + pkg.require(r) + except (pkg.VersionConflict, pkg.DistributionNotFound): # exception if requirements not met + s += f'"{r}" ' + n += 1 + + if s and install and AUTOINSTALL: # check environment variable + LOGGER.info(f"{prefix} YOLO requirement{'s' * (n > 1)} {s}not found, attempting AutoUpdate...") + try: + # assert check_online(), "AutoUpdate skipped (offline)" + LOGGER.info(check_output(f'pip install {s} {cmds}', shell=True).decode()) + source = file if 'file' in locals() else requirements + s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \ + f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n" + LOGGER.info(s) + except Exception as e: + LOGGER.warning(f'{prefix} ❌ {e}') + + +def check_img_size(imgsz, s=32, floor=0): + # Verify image size is a multiple of stride s in each dimension + if isinstance(imgsz, int): # integer i.e. img_size=640 + new_size = max(make_divisible(imgsz, int(s)), floor) + else: # list i.e. img_size=[640, 480] + imgsz = list(imgsz) # convert to list if tuple + new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz] + if new_size != imgsz: + LOGGER.warning(f'WARNING ⚠️ --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}') + return new_size + + +def check_imshow(warn=False): + # Check if environment supports image displays + try: + assert not is_notebook() + assert not is_docker() + cv2.imshow('test', np.zeros((1, 1, 3))) + cv2.waitKey(1) + cv2.destroyAllWindows() + cv2.waitKey(1) + return True + except Exception as e: + if warn: + LOGGER.warning(f'WARNING ⚠️ Environment does not support cv2.imshow() or PIL Image.show()\n{e}') + return False + + +def check_suffix(file='yolo.pt', suffix=('.pt',), msg=''): + # Check file(s) for acceptable suffix + if file and suffix: + if isinstance(suffix, str): + suffix = [suffix] + for f in file if isinstance(file, (list, tuple)) else [file]: + s = Path(f).suffix.lower() # file suffix + if len(s): + assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}" + + +def check_yaml(file, suffix=('.yaml', '.yml')): + # Search/download YAML file (if necessary) and return path, checking suffix + return check_file(file, suffix) + + +def check_file(file, suffix=''): + # Search/download file (if necessary) and return path + check_suffix(file, suffix) # optional + file = str(file) # convert to str() + if os.path.isfile(file) or not file: # exists + return file + elif file.startswith(('http:/', 'https:/')): # download + url = file # warning: Pathlib turns :// -> :/ + file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth + if os.path.isfile(file): + LOGGER.info(f'Found {url} locally at {file}') # file already exists + else: + LOGGER.info(f'Downloading {url} to {file}...') + torch.hub.download_url_to_file(url, file) + assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check + return file + elif file.startswith('clearml://'): # ClearML Dataset ID + assert 'clearml' in sys.modules, "ClearML is not installed, so cannot use ClearML dataset. Try running 'pip install clearml'." + return file + else: # search + files = [] + for d in 'data', 'models', 'utils': # search directories + files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file + assert len(files), f'File not found: {file}' # assert file was found + assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique + return files[0] # return file + + +def check_font(font=FONT, progress=False): + # Download font to CONFIG_DIR if necessary + font = Path(font) + file = CONFIG_DIR / font.name + if not font.exists() and not file.exists(): + url = f'https://ultralytics.com/assets/{font.name}' + LOGGER.info(f'Downloading {url} to {file}...') + torch.hub.download_url_to_file(url, str(file), progress=progress) + + +def check_dataset(data, autodownload=True): + # Download, check and/or unzip dataset if not found locally + + # Download (optional) + extract_dir = '' + if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)): + download(data, dir=f'{DATASETS_DIR}/{Path(data).stem}', unzip=True, delete=False, curl=False, threads=1) + data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml')) + extract_dir, autodownload = data.parent, False + + # Read yaml (optional) + if isinstance(data, (str, Path)): + data = yaml_load(data) # dictionary + + # Checks + for k in 'train', 'val', 'names': + assert k in data, emojis(f"data.yaml '{k}:' field missing ❌") + if isinstance(data['names'], (list, tuple)): # old array format + data['names'] = dict(enumerate(data['names'])) # convert to dict + assert all(isinstance(k, int) for k in data['names'].keys()), 'data.yaml names keys must be integers, i.e. 2: car' + data['nc'] = len(data['names']) + + # Resolve paths + path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.' + if not path.is_absolute(): + path = (ROOT / path).resolve() + data['path'] = path # download scripts + for k in 'train', 'val', 'test': + if data.get(k): # prepend path + if isinstance(data[k], str): + x = (path / data[k]).resolve() + if not x.exists() and data[k].startswith('../'): + x = (path / data[k][3:]).resolve() + data[k] = str(x) + else: + data[k] = [str((path / x).resolve()) for x in data[k]] + + # Parse yaml + train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download')) + if val: + val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path + if not all(x.exists() for x in val): + LOGGER.info('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]) + if not s or not autodownload: + raise Exception('Dataset not found ❌') + t = time.time() + if s.startswith('http') and s.endswith('.zip'): # URL + f = Path(s).name # filename + LOGGER.info(f'Downloading {s} to {f}...') + torch.hub.download_url_to_file(s, f) + Path(DATASETS_DIR).mkdir(parents=True, exist_ok=True) # create root + unzip_file(f, path=DATASETS_DIR) # unzip + Path(f).unlink() # remove zip + r = None # success + elif s.startswith('bash '): # bash script + LOGGER.info(f'Running {s} ...') + r = os.system(s) + else: # python script + r = exec(s, {'yaml': data}) # return None + dt = f'({round(time.time() - t, 1)}s)' + s = f"success ✅ {dt}, saved to {colorstr('bold', DATASETS_DIR)}" if r in (0, None) else f"failure {dt} ❌" + LOGGER.info(f"Dataset download {s}") + check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts + return data # dictionary + + +def check_amp(model): + # Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation + from models.common import AutoShape, DetectMultiBackend + + def amp_allclose(model, im): + # All close FP32 vs AMP results + m = AutoShape(model, verbose=False) # model + a = m(im).xywhn[0] # FP32 inference + m.amp = True + b = m(im).xywhn[0] # AMP inference + return a.shape == b.shape and torch.allclose(a, b, atol=0.1) # close to 10% absolute tolerance + + prefix = colorstr('AMP: ') + device = next(model.parameters()).device # get model device + if device.type in ('cpu', 'mps'): + return False # AMP only used on CUDA devices + f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check + im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3)) + try: + #assert amp_allclose(deepcopy(model), im) or amp_allclose(DetectMultiBackend('yolo.pt', device), im) + LOGGER.info(f'{prefix}checks passed ✅') + return True + except Exception: + help_url = 'https://github.com/ultralytics/yolov5/issues/7908' + LOGGER.warning(f'{prefix}checks failed ❌, disabling Automatic Mixed Precision. See {help_url}') + return False + + +def yaml_load(file='data.yaml'): + # Single-line safe yaml loading + with open(file, errors='ignore') as f: + return yaml.safe_load(f) + + +def yaml_save(file='data.yaml', data={}): + # Single-line safe yaml saving + with open(file, 'w') as f: + yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False) + + +def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')): + # Unzip a *.zip file to path/, excluding files containing strings in exclude list + if path is None: + path = Path(file).parent # default path + with ZipFile(file) as zipObj: + for f in zipObj.namelist(): # list all archived filenames in the zip + if all(x not in f for x in exclude): + zipObj.extract(f, path=path) + + +def url2file(url): + # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt + url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/ + return Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth + + +def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry=3): + # Multithreaded file download and unzip function, used in data.yaml for autodownload + def download_one(url, dir): + # Download 1 file + success = True + if os.path.isfile(url): + f = Path(url) # filename + else: # does not exist + f = dir / Path(url).name + LOGGER.info(f'Downloading {url} to {f}...') + for i in range(retry + 1): + if curl: + s = 'sS' if threads > 1 else '' # silent + r = os.system( + f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue + success = r == 0 + else: + torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download + success = f.is_file() + if success: + break + elif i < retry: + LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...') + else: + LOGGER.warning(f'❌ Failed to download {url}...') + + if unzip and success and (f.suffix == '.gz' or is_zipfile(f) or is_tarfile(f)): + LOGGER.info(f'Unzipping {f}...') + if is_zipfile(f): + unzip_file(f, dir) # unzip + elif is_tarfile(f): + os.system(f'tar xf {f} --directory {f.parent}') # unzip + elif f.suffix == '.gz': + os.system(f'tar xfz {f} --directory {f.parent}') # unzip + if delete: + f.unlink() # remove zip + + dir = Path(dir) + dir.mkdir(parents=True, exist_ok=True) # make directory + if threads > 1: + pool = ThreadPool(threads) + pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multithreaded + pool.close() + pool.join() + else: + for u in [url] if isinstance(url, (str, Path)) else url: + download_one(u, dir) + + +def make_divisible(x, divisor): + # Returns nearest x divisible by divisor + if isinstance(divisor, torch.Tensor): + divisor = int(divisor.max()) # to int + return math.ceil(x / divisor) * divisor + + +def clean_str(s): + # Cleans a string by replacing special characters with underscore _ + return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) + + +def one_cycle(y1=0.0, y2=1.0, steps=100): + # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf + return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 + + +def one_flat_cycle(y1=0.0, y2=1.0, steps=100): + # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf + #return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 + return lambda x: ((1 - math.cos((x - (steps // 2)) * math.pi / (steps // 2))) / 2) * (y2 - y1) + y1 if (x > (steps // 2)) else y1 + + +def colorstr(*input): + # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') + *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string + colors = { + 'black': '\033[30m', # basic colors + 'red': '\033[31m', + 'green': '\033[32m', + 'yellow': '\033[33m', + 'blue': '\033[34m', + 'magenta': '\033[35m', + 'cyan': '\033[36m', + 'white': '\033[37m', + 'bright_black': '\033[90m', # bright colors + 'bright_red': '\033[91m', + 'bright_green': '\033[92m', + 'bright_yellow': '\033[93m', + 'bright_blue': '\033[94m', + 'bright_magenta': '\033[95m', + 'bright_cyan': '\033[96m', + 'bright_white': '\033[97m', + 'end': '\033[0m', # misc + 'bold': '\033[1m', + 'underline': '\033[4m'} + return ''.join(colors[x] for x in args) + f'{string}' + colors['end'] + + +def labels_to_class_weights(labels, nc=80): + # Get class weights (inverse frequency) from training labels + if labels[0] is None: # no labels loaded + return torch.Tensor() + + labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO + classes = labels[:, 0].astype(int) # labels = [class xywh] + weights = np.bincount(classes, minlength=nc) # occurrences per class + + # Prepend gridpoint count (for uCE training) + # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image + # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start + + weights[weights == 0] = 1 # replace empty bins with 1 + weights = 1 / weights # number of targets per class + weights /= weights.sum() # normalize + return torch.from_numpy(weights).float() + + +def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): + # Produces image weights based on class_weights and image contents + # Usage: index = random.choices(range(n), weights=image_weights, k=1) # weighted image sample + class_counts = np.array([np.bincount(x[:, 0].astype(int), minlength=nc) for x in labels]) + return (class_weights.reshape(1, nc) * class_counts).sum(1) + + +def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) + # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ + # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') + # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') + # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco + # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet + return [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + + +def xyxy2xywh(x): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center + y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center + y[..., 2] = x[..., 2] - x[..., 0] # width + y[..., 3] = x[..., 3] - x[..., 1] # height + return y + + +def xywh2xyxy(x): + # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x + y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y + y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x + y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y + return y + + +def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): + # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x + y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y + y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x + y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y + return y + + +def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0): + # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right + if clip: + clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center + y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center + y[..., 2] = (x[..., 2] - x[..., 0]) / w # width + y[..., 3] = (x[..., 3] - x[..., 1]) / h # height + return y + + +def xyn2xy(x, w=640, h=640, padw=0, padh=0): + # Convert normalized segments into pixel segments, shape (n,2) + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = w * x[..., 0] + padw # top left x + y[..., 1] = h * x[..., 1] + padh # top left y + return y + + +def segment2box(segment, width=640, height=640): + # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) + x, y = segment.T # segment xy + inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) + x, y, = x[inside], y[inside] + return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy + + +def segments2boxes(segments): + # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) + boxes = [] + for s in segments: + x, y = s.T # segment xy + boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy + return xyxy2xywh(np.array(boxes)) # cls, xywh + + +def resample_segments(segments, n=1000): + # Up-sample an (n,2) segment + for i, s in enumerate(segments): + s = np.concatenate((s, s[0:1, :]), axis=0) + x = np.linspace(0, len(s) - 1, n) + xp = np.arange(len(s)) + segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy + return segments + + +def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None): + # Rescale boxes (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + boxes[:, [0, 2]] -= pad[0] # x padding + boxes[:, [1, 3]] -= pad[1] # y padding + boxes[:, :4] /= gain + clip_boxes(boxes, img0_shape) + return boxes + + +def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False): + # Rescale coords (xyxy) from img1_shape to img0_shape + if ratio_pad is None: # calculate from img0_shape + gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + else: + gain = ratio_pad[0][0] + pad = ratio_pad[1] + + segments[:, 0] -= pad[0] # x padding + segments[:, 1] -= pad[1] # y padding + segments /= gain + clip_segments(segments, img0_shape) + if normalize: + segments[:, 0] /= img0_shape[1] # width + segments[:, 1] /= img0_shape[0] # height + return segments + + +def clip_boxes(boxes, shape): + # Clip boxes (xyxy) to image shape (height, width) + if isinstance(boxes, torch.Tensor): # faster individually + boxes[:, 0].clamp_(0, shape[1]) # x1 + boxes[:, 1].clamp_(0, shape[0]) # y1 + boxes[:, 2].clamp_(0, shape[1]) # x2 + boxes[:, 3].clamp_(0, shape[0]) # y2 + else: # np.array (faster grouped) + boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2 + boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2 + + +def clip_segments(segments, shape): + # Clip segments (xy1,xy2,...) to image shape (height, width) + if isinstance(segments, torch.Tensor): # faster individually + segments[:, 0].clamp_(0, shape[1]) # x + segments[:, 1].clamp_(0, shape[0]) # y + else: # np.array (faster grouped) + segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x + segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y + + +def non_max_suppression( + prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, + labels=(), + max_det=300, + nm=0, # number of masks +): + """Non-Maximum Suppression (NMS) on inference results to reject overlapping detections + + Returns: + list of detections, on (n,6) tensor per image [xyxy, conf, cls] + """ + + if isinstance(prediction, (list, tuple)): # YOLO model in validation model, output = (inference_out, loss_out) + prediction = prediction[0] # select only inference output + + device = prediction.device + mps = 'mps' in device.type # Apple MPS + if mps: # MPS not fully supported yet, convert tensors to CPU before NMS + prediction = prediction.cpu() + bs = prediction.shape[0] # batch size + nc = prediction.shape[1] - nm - 4 # number of classes + mi = 4 + nc # mask start index + xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates + + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + # min_wh = 2 # (pixels) minimum box width and height + max_wh = 7680 # (pixels) maximum box width and height + max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() + time_limit = 2.5 + 0.05 * bs # seconds to quit after + redundant = True # require redundant detections + multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) + merge = False # use merge-NMS + + t = time.time() + output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs + for xi, x in enumerate(prediction): # image index, image inference + # Apply constraints + # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height + x = x.T[xc[xi]] # confidence + + # Cat apriori labels if autolabelling + if labels and len(labels[xi]): + lb = labels[xi] + v = torch.zeros((len(lb), nc + nm + 5), device=x.device) + v[:, :4] = lb[:, 1:5] # box + v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls + x = torch.cat((x, v), 0) + + # If none remain process next image + if not x.shape[0]: + continue + + # Detections matrix nx6 (xyxy, conf, cls) + box, cls, mask = x.split((4, nc, nm), 1) + box = xywh2xyxy(box) # center_x, center_y, width, height) to (x1, y1, x2, y2) + if multi_label: + i, j = (cls > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1) + else: # best class only + conf, j = cls.max(1, keepdim=True) + x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres] + + # Filter by class + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + # Apply finite constraint + # if not torch.isfinite(x).all(): + # x = x[torch.isfinite(x).all(1)] + + # Check shape + n = x.shape[0] # number of boxes + if not n: # no boxes + continue + elif n > max_nms: # excess boxes + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + else: + x = x[x[:, 4].argsort(descending=True)] # sort by confidence + + # Batched NMS + c = x[:, 5:6] * (0 if agnostic else max_wh) # classes + boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS + if i.shape[0] > max_det: # limit detections + i = i[:max_det] + if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) + iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix + weights = iou * scores[None] # box weights + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + if redundant: + i = i[iou.sum(1) > 1] # require redundancy + + output[xi] = x[i] + if mps: + output[xi] = output[xi].to(device) + if (time.time() - t) > time_limit: + LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded') + break # time limit exceeded + + return output + + +def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer() + # Strip optimizer from 'f' to finalize training, optionally save as 's' + x = torch.load(f, map_location=torch.device('cpu')) + if x.get('ema'): + x['model'] = x['ema'] # replace model with ema + for k in 'optimizer', 'best_fitness', 'ema', 'updates': # keys + x[k] = None + x['epoch'] = -1 + x['model'].half() # to FP16 + for p in x['model'].parameters(): + p.requires_grad = False + torch.save(x, s or f) + mb = os.path.getsize(s or f) / 1E6 # filesize + LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB") + + +def print_mutation(keys, results, hyp, save_dir, bucket, prefix=colorstr('evolve: ')): + evolve_csv = save_dir / 'evolve.csv' + evolve_yaml = save_dir / 'hyp_evolve.yaml' + keys = tuple(keys) + tuple(hyp.keys()) # [results + hyps] + keys = tuple(x.strip() for x in keys) + vals = results + tuple(hyp.values()) + n = len(keys) + + # Download (optional) + if bucket: + url = f'gs://{bucket}/evolve.csv' + if gsutil_getsize(url) > (evolve_csv.stat().st_size if evolve_csv.exists() else 0): + os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local + + # Log to evolve.csv + s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header + with open(evolve_csv, 'a') as f: + f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n') + + # Save yaml + with open(evolve_yaml, 'w') as f: + data = pd.read_csv(evolve_csv) + data = data.rename(columns=lambda x: x.strip()) # strip keys + i = np.argmax(fitness(data.values[:, :4])) # + generations = len(data) + f.write('# YOLO Hyperparameter Evolution Results\n' + f'# Best generation: {i}\n' + + f'# Last generation: {generations - 1}\n' + '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) + + '\n' + '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n') + yaml.safe_dump(data.loc[i][7:].to_dict(), f, sort_keys=False) + + # Print to screen + LOGGER.info(prefix + f'{generations} generations finished, current result:\n' + prefix + + ', '.join(f'{x.strip():>20s}' for x in keys) + '\n' + prefix + ', '.join(f'{x:20.5g}' + for x in vals) + '\n\n') + + if bucket: + os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload + + +def apply_classifier(x, model, img, im0): + # Apply a second stage classifier to YOLO outputs + # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval() + im0 = [im0] if isinstance(im0, np.ndarray) else im0 + for i, d in enumerate(x): # per image + if d is not None and len(d): + d = d.clone() + + # Reshape and pad cutouts + b = xyxy2xywh(d[:, :4]) # boxes + b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square + b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad + d[:, :4] = xywh2xyxy(b).long() + + # Rescale boxes from img_size to im0 size + scale_boxes(img.shape[2:], d[:, :4], im0[i].shape) + + # Classes + pred_cls1 = d[:, 5].long() + ims = [] + for a in d: + cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])] + im = cv2.resize(cutout, (224, 224)) # BGR + + im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 + im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + ims.append(im) + + pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction + x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections + + return x + + +def increment_path(path, exist_ok=False, sep='', mkdir=False): + # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. + path = Path(path) # os-agnostic + if path.exists() and not exist_ok: + path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') + + # Method 1 + for n in range(2, 9999): + p = f'{path}{sep}{n}{suffix}' # increment path + if not os.path.exists(p): # + break + path = Path(p) + + # Method 2 (deprecated) + # dirs = glob.glob(f"{path}{sep}*") # similar paths + # matches = [re.search(rf"{path.stem}{sep}(\d+)", d) for d in dirs] + # i = [int(m.groups()[0]) for m in matches if m] # indices + # n = max(i) + 1 if i else 2 # increment number + # path = Path(f"{path}{sep}{n}{suffix}") # increment path + + if mkdir: + path.mkdir(parents=True, exist_ok=True) # make directory + + return path + + +# OpenCV Chinese-friendly functions ------------------------------------------------------------------------------------ +imshow_ = cv2.imshow # copy to avoid recursion errors + + +def imread(path, flags=cv2.IMREAD_COLOR): + return cv2.imdecode(np.fromfile(path, np.uint8), flags) + + +def imwrite(path, im): + try: + cv2.imencode(Path(path).suffix, im)[1].tofile(path) + return True + except Exception: + return False + + +def imshow(path, im): + imshow_(path.encode('unicode_escape').decode(), im) + + +cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow # redefine + +# Variables ------------------------------------------------------------------------------------------------------------ diff --git a/utils/lion.py b/utils/lion.py new file mode 100644 index 0000000000000000000000000000000000000000..63651cff24e3d00e7e15a2cff2a81d1da46b8c5e --- /dev/null +++ b/utils/lion.py @@ -0,0 +1,67 @@ +"""PyTorch implementation of the Lion optimizer.""" +import torch +from torch.optim.optimizer import Optimizer + + +class Lion(Optimizer): + r"""Implements Lion algorithm.""" + + def __init__(self, params, lr=1e-4, betas=(0.9, 0.99), weight_decay=0.0): + """Initialize the hyperparameters. + Args: + params (iterable): iterable of parameters to optimize or dicts defining + parameter groups + lr (float, optional): learning rate (default: 1e-4) + betas (Tuple[float, float], optional): coefficients used for computing + running averages of gradient and its square (default: (0.9, 0.99)) + weight_decay (float, optional): weight decay coefficient (default: 0) + """ + + if not 0.0 <= lr: + raise ValueError('Invalid learning rate: {}'.format(lr)) + if not 0.0 <= betas[0] < 1.0: + raise ValueError('Invalid beta parameter at index 0: {}'.format(betas[0])) + if not 0.0 <= betas[1] < 1.0: + raise ValueError('Invalid beta parameter at index 1: {}'.format(betas[1])) + defaults = dict(lr=lr, betas=betas, weight_decay=weight_decay) + super().__init__(params, defaults) + + @torch.no_grad() + def step(self, closure=None): + """Performs a single optimization step. + Args: + closure (callable, optional): A closure that reevaluates the model + and returns the loss. + Returns: + the loss. + """ + loss = None + if closure is not None: + with torch.enable_grad(): + loss = closure() + + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + + # Perform stepweight decay + p.data.mul_(1 - group['lr'] * group['weight_decay']) + + grad = p.grad + state = self.state[p] + # State initialization + if len(state) == 0: + # Exponential moving average of gradient values + state['exp_avg'] = torch.zeros_like(p) + + exp_avg = state['exp_avg'] + beta1, beta2 = group['betas'] + + # Weight update + update = exp_avg * beta1 + grad * (1 - beta1) + p.add_(torch.sign(update), alpha=-group['lr']) + # Decay the momentum running average coefficient + exp_avg.mul_(beta2).add_(grad, alpha=1 - beta2) + + return loss \ No newline at end of file diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8fc8377ab60987a0903de62ef0b20a946289ea8f --- /dev/null +++ b/utils/loggers/__init__.py @@ -0,0 +1,399 @@ +import os +import warnings +from pathlib import Path + +import pkg_resources as pkg +import torch +from torch.utils.tensorboard import SummaryWriter + +from utils.general import LOGGER, colorstr, cv2 +from utils.loggers.clearml.clearml_utils import ClearmlLogger +from utils.loggers.wandb.wandb_utils import WandbLogger +from utils.plots import plot_images, plot_labels, plot_results +from utils.torch_utils import de_parallel + +LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet') # *.csv, TensorBoard, Weights & Biases, ClearML +RANK = int(os.getenv('RANK', -1)) + +try: + import wandb + + assert hasattr(wandb, '__version__') # verify package import not local dir + if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in {0, -1}: + try: + wandb_login_success = wandb.login(timeout=30) + except wandb.errors.UsageError: # known non-TTY terminal issue + wandb_login_success = False + if not wandb_login_success: + wandb = None +except (ImportError, AssertionError): + wandb = None + +try: + import clearml + + assert hasattr(clearml, '__version__') # verify package import not local dir +except (ImportError, AssertionError): + clearml = None + +try: + if RANK not in [0, -1]: + comet_ml = None + else: + import comet_ml + + assert hasattr(comet_ml, '__version__') # verify package import not local dir + from utils.loggers.comet import CometLogger + +except (ModuleNotFoundError, ImportError, AssertionError): + comet_ml = None + + +class Loggers(): + # YOLO Loggers class + def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS): + self.save_dir = save_dir + self.weights = weights + self.opt = opt + self.hyp = hyp + self.plots = not opt.noplots # plot results + self.logger = logger # for printing results to console + self.include = include + self.keys = [ + 'train/box_loss', + 'train/cls_loss', + 'train/dfl_loss', # train loss + 'metrics/precision', + 'metrics/recall', + 'metrics/mAP_0.5', + 'metrics/mAP_0.5:0.95', # metrics + 'val/box_loss', + 'val/cls_loss', + 'val/dfl_loss', # val loss + 'x/lr0', + 'x/lr1', + 'x/lr2'] # params + self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95'] + for k in LOGGERS: + setattr(self, k, None) # init empty logger dictionary + self.csv = True # always log to csv + + # Messages + # if not wandb: + # prefix = colorstr('Weights & Biases: ') + # s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLO 🚀 runs in Weights & Biases" + # self.logger.info(s) + if not clearml: + prefix = colorstr('ClearML: ') + s = f"{prefix}run 'pip install clearml' to automatically track, visualize and remotely train YOLO 🚀 in ClearML" + self.logger.info(s) + if not comet_ml: + prefix = colorstr('Comet: ') + s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLO 🚀 runs in Comet" + self.logger.info(s) + # TensorBoard + s = self.save_dir + if 'tb' in self.include and not self.opt.evolve: + prefix = colorstr('TensorBoard: ') + self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/") + self.tb = SummaryWriter(str(s)) + + # W&B + if wandb and 'wandb' in self.include: + wandb_artifact_resume = isinstance(self.opt.resume, str) and self.opt.resume.startswith('wandb-artifact://') + run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume and not wandb_artifact_resume else None + self.opt.hyp = self.hyp # add hyperparameters + self.wandb = WandbLogger(self.opt, run_id) + # temp warn. because nested artifacts not supported after 0.12.10 + # if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.11'): + # s = "YOLO temporarily requires wandb version 0.12.10 or below. Some features may not work as expected." + # self.logger.warning(s) + else: + self.wandb = None + + # ClearML + if clearml and 'clearml' in self.include: + self.clearml = ClearmlLogger(self.opt, self.hyp) + else: + self.clearml = None + + # Comet + if comet_ml and 'comet' in self.include: + if isinstance(self.opt.resume, str) and self.opt.resume.startswith("comet://"): + run_id = self.opt.resume.split("/")[-1] + self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id) + + else: + self.comet_logger = CometLogger(self.opt, self.hyp) + + else: + self.comet_logger = None + + @property + def remote_dataset(self): + # Get data_dict if custom dataset artifact link is provided + data_dict = None + if self.clearml: + data_dict = self.clearml.data_dict + if self.wandb: + data_dict = self.wandb.data_dict + if self.comet_logger: + data_dict = self.comet_logger.data_dict + + return data_dict + + def on_train_start(self): + if self.comet_logger: + self.comet_logger.on_train_start() + + def on_pretrain_routine_start(self): + if self.comet_logger: + self.comet_logger.on_pretrain_routine_start() + + def on_pretrain_routine_end(self, labels, names): + # Callback runs on pre-train routine end + if self.plots: + plot_labels(labels, names, self.save_dir) + paths = self.save_dir.glob('*labels*.jpg') # training labels + if self.wandb: + self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]}) + # if self.clearml: + # pass # ClearML saves these images automatically using hooks + if self.comet_logger: + self.comet_logger.on_pretrain_routine_end(paths) + + def on_train_batch_end(self, model, ni, imgs, targets, paths, vals): + log_dict = dict(zip(self.keys[0:3], vals)) + # Callback runs on train batch end + # ni: number integrated batches (since train start) + if self.plots: + if ni < 3: + f = self.save_dir / f'train_batch{ni}.jpg' # filename + plot_images(imgs, targets, paths, f) + if ni == 0 and self.tb and not self.opt.sync_bn: + log_tensorboard_graph(self.tb, model, imgsz=(self.opt.imgsz, self.opt.imgsz)) + if ni == 10 and (self.wandb or self.clearml): + files = sorted(self.save_dir.glob('train*.jpg')) + if self.wandb: + self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]}) + if self.clearml: + self.clearml.log_debug_samples(files, title='Mosaics') + + if self.comet_logger: + self.comet_logger.on_train_batch_end(log_dict, step=ni) + + def on_train_epoch_end(self, epoch): + # Callback runs on train epoch end + if self.wandb: + self.wandb.current_epoch = epoch + 1 + + if self.comet_logger: + self.comet_logger.on_train_epoch_end(epoch) + + def on_val_start(self): + if self.comet_logger: + self.comet_logger.on_val_start() + + def on_val_image_end(self, pred, predn, path, names, im): + # Callback runs on val image end + if self.wandb: + self.wandb.val_one_image(pred, predn, path, names, im) + if self.clearml: + self.clearml.log_image_with_boxes(path, pred, names, im) + + def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out): + if self.comet_logger: + self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out) + + def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix): + # Callback runs on val end + if self.wandb or self.clearml: + files = sorted(self.save_dir.glob('val*.jpg')) + if self.wandb: + self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]}) + if self.clearml: + self.clearml.log_debug_samples(files, title='Validation') + + if self.comet_logger: + self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix) + + def on_fit_epoch_end(self, vals, epoch, best_fitness, fi): + # Callback runs at the end of each fit (train+val) epoch + x = dict(zip(self.keys, vals)) + if self.csv: + file = self.save_dir / 'results.csv' + n = len(x) + 1 # number of cols + s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header + with open(file, 'a') as f: + f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') + + if self.tb: + for k, v in x.items(): + self.tb.add_scalar(k, v, epoch) + elif self.clearml: # log to ClearML if TensorBoard not used + for k, v in x.items(): + title, series = k.split('/') + self.clearml.task.get_logger().report_scalar(title, series, v, epoch) + + if self.wandb: + if best_fitness == fi: + best_results = [epoch] + vals[3:7] + for i, name in enumerate(self.best_keys): + self.wandb.wandb_run.summary[name] = best_results[i] # log best results in the summary + self.wandb.log(x) + self.wandb.end_epoch(best_result=best_fitness == fi) + + if self.clearml: + self.clearml.current_epoch_logged_images = set() # reset epoch image limit + self.clearml.current_epoch += 1 + + if self.comet_logger: + self.comet_logger.on_fit_epoch_end(x, epoch=epoch) + + def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): + # Callback runs on model save event + if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1: + if self.wandb: + self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi) + if self.clearml: + self.clearml.task.update_output_model(model_path=str(last), + model_name='Latest Model', + auto_delete_file=False) + + if self.comet_logger: + self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi) + + def on_train_end(self, last, best, epoch, results): + # Callback runs on training end, i.e. saving best model + if self.plots: + plot_results(file=self.save_dir / 'results.csv') # save results.png + files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))] + files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter + self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}") + + if self.tb and not self.clearml: # These images are already captured by ClearML by now, we don't want doubles + for f in files: + self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') + + if self.wandb: + self.wandb.log(dict(zip(self.keys[3:10], results))) + self.wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]}) + # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model + if not self.opt.evolve: + wandb.log_artifact(str(best if best.exists() else last), + type='model', + name=f'run_{self.wandb.wandb_run.id}_model', + aliases=['latest', 'best', 'stripped']) + self.wandb.finish_run() + + if self.clearml and not self.opt.evolve: + self.clearml.task.update_output_model(model_path=str(best if best.exists() else last), + name='Best Model', + auto_delete_file=False) + + if self.comet_logger: + final_results = dict(zip(self.keys[3:10], results)) + self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results) + + def on_params_update(self, params: dict): + # Update hyperparams or configs of the experiment + if self.wandb: + self.wandb.wandb_run.config.update(params, allow_val_change=True) + if self.comet_logger: + self.comet_logger.on_params_update(params) + + +class GenericLogger: + """ + YOLO General purpose logger for non-task specific logging + Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...) + Arguments + opt: Run arguments + console_logger: Console logger + include: loggers to include + """ + + def __init__(self, opt, console_logger, include=('tb', 'wandb')): + # init default loggers + self.save_dir = Path(opt.save_dir) + self.include = include + self.console_logger = console_logger + self.csv = self.save_dir / 'results.csv' # CSV logger + if 'tb' in self.include: + prefix = colorstr('TensorBoard: ') + self.console_logger.info( + f"{prefix}Start with 'tensorboard --logdir {self.save_dir.parent}', view at http://localhost:6006/") + self.tb = SummaryWriter(str(self.save_dir)) + + if wandb and 'wandb' in self.include: + self.wandb = wandb.init(project=web_project_name(str(opt.project)), + name=None if opt.name == "exp" else opt.name, + config=opt) + else: + self.wandb = None + + def log_metrics(self, metrics, epoch): + # Log metrics dictionary to all loggers + if self.csv: + keys, vals = list(metrics.keys()), list(metrics.values()) + n = len(metrics) + 1 # number of cols + s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header + with open(self.csv, 'a') as f: + f.write(s + ('%23.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n') + + if self.tb: + for k, v in metrics.items(): + self.tb.add_scalar(k, v, epoch) + + if self.wandb: + self.wandb.log(metrics, step=epoch) + + def log_images(self, files, name='Images', epoch=0): + # Log images to all loggers + files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])] # to Path + files = [f for f in files if f.exists()] # filter by exists + + if self.tb: + for f in files: + self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC') + + if self.wandb: + self.wandb.log({name: [wandb.Image(str(f), caption=f.name) for f in files]}, step=epoch) + + def log_graph(self, model, imgsz=(640, 640)): + # Log model graph to all loggers + if self.tb: + log_tensorboard_graph(self.tb, model, imgsz) + + def log_model(self, model_path, epoch=0, metadata={}): + # Log model to all loggers + if self.wandb: + art = wandb.Artifact(name=f"run_{wandb.run.id}_model", type="model", metadata=metadata) + art.add_file(str(model_path)) + wandb.log_artifact(art) + + def update_params(self, params): + # Update the paramters logged + if self.wandb: + wandb.run.config.update(params, allow_val_change=True) + + +def log_tensorboard_graph(tb, model, imgsz=(640, 640)): + # Log model graph to TensorBoard + try: + p = next(model.parameters()) # for device, type + imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz # expand + im = torch.zeros((1, 3, *imgsz)).to(p.device).type_as(p) # input image (WARNING: must be zeros, not empty) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress jit trace warning + tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), []) + except Exception as e: + LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}') + + +def web_project_name(project): + # Convert local project name to web project name + if not project.startswith('runs/train'): + return project + suffix = '-Classify' if project.endswith('-cls') else '-Segment' if project.endswith('-seg') else '' + return f'YOLO{suffix}' diff --git a/utils/loggers/clearml/__init__.py b/utils/loggers/clearml/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..84952a8167bc2975913a6def6b4f027d566552a9 --- /dev/null +++ b/utils/loggers/clearml/__init__.py @@ -0,0 +1 @@ +# init \ No newline at end of file diff --git a/utils/loggers/clearml/clearml_utils.py b/utils/loggers/clearml/clearml_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fe5f597a87a635b15dbfe5d7ed5a6c285ebff6bd --- /dev/null +++ b/utils/loggers/clearml/clearml_utils.py @@ -0,0 +1,157 @@ +"""Main Logger class for ClearML experiment tracking.""" +import glob +import re +from pathlib import Path + +import numpy as np +import yaml + +from utils.plots import Annotator, colors + +try: + import clearml + from clearml import Dataset, Task + + assert hasattr(clearml, '__version__') # verify package import not local dir +except (ImportError, AssertionError): + clearml = None + + +def construct_dataset(clearml_info_string): + """Load in a clearml dataset and fill the internal data_dict with its contents. + """ + dataset_id = clearml_info_string.replace('clearml://', '') + dataset = Dataset.get(dataset_id=dataset_id) + dataset_root_path = Path(dataset.get_local_copy()) + + # We'll search for the yaml file definition in the dataset + yaml_filenames = list(glob.glob(str(dataset_root_path / "*.yaml")) + glob.glob(str(dataset_root_path / "*.yml"))) + if len(yaml_filenames) > 1: + raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains ' + 'the dataset definition this way.') + elif len(yaml_filenames) == 0: + raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file ' + 'inside the dataset root path.') + with open(yaml_filenames[0]) as f: + dataset_definition = yaml.safe_load(f) + + assert set(dataset_definition.keys()).issuperset( + {'train', 'test', 'val', 'nc', 'names'} + ), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')" + + data_dict = dict() + data_dict['train'] = str( + (dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None + data_dict['test'] = str( + (dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None + data_dict['val'] = str( + (dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None + data_dict['nc'] = dataset_definition['nc'] + data_dict['names'] = dataset_definition['names'] + + return data_dict + + +class ClearmlLogger: + """Log training runs, datasets, models, and predictions to ClearML. + + This logger sends information to ClearML at app.clear.ml or to your own hosted server. By default, + this information includes hyperparameters, system configuration and metrics, model metrics, code information and + basic data metrics and analyses. + + By providing additional command line arguments to train.py, datasets, + models and predictions can also be logged. + """ + + def __init__(self, opt, hyp): + """ + - Initialize ClearML Task, this object will capture the experiment + - Upload dataset version to ClearML Data if opt.upload_dataset is True + + arguments: + opt (namespace) -- Commandline arguments for this run + hyp (dict) -- Hyperparameters for this run + + """ + self.current_epoch = 0 + # Keep tracked of amount of logged images to enforce a limit + self.current_epoch_logged_images = set() + # Maximum number of images to log to clearML per epoch + self.max_imgs_to_log_per_epoch = 16 + # Get the interval of epochs when bounding box images should be logged + self.bbox_interval = opt.bbox_interval + self.clearml = clearml + self.task = None + self.data_dict = None + if self.clearml: + self.task = Task.init( + project_name=opt.project if opt.project != 'runs/train' else 'YOLOv5', + task_name=opt.name if opt.name != 'exp' else 'Training', + tags=['YOLOv5'], + output_uri=True, + auto_connect_frameworks={'pytorch': False} + # We disconnect pytorch auto-detection, because we added manual model save points in the code + ) + # ClearML's hooks will already grab all general parameters + # Only the hyperparameters coming from the yaml config file + # will have to be added manually! + self.task.connect(hyp, name='Hyperparameters') + + # Get ClearML Dataset Version if requested + if opt.data.startswith('clearml://'): + # data_dict should have the following keys: + # names, nc (number of classes), test, train, val (all three relative paths to ../datasets) + self.data_dict = construct_dataset(opt.data) + # Set data to data_dict because wandb will crash without this information and opt is the best way + # to give it to them + opt.data = self.data_dict + + def log_debug_samples(self, files, title='Debug Samples'): + """ + Log files (images) as debug samples in the ClearML task. + + arguments: + files (List(PosixPath)) a list of file paths in PosixPath format + title (str) A title that groups together images with the same values + """ + for f in files: + if f.exists(): + it = re.search(r'_batch(\d+)', f.name) + iteration = int(it.groups()[0]) if it else 0 + self.task.get_logger().report_image(title=title, + series=f.name.replace(it.group(), ''), + local_path=str(f), + iteration=iteration) + + def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25): + """ + Draw the bounding boxes on a single image and report the result as a ClearML debug sample. + + arguments: + image_path (PosixPath) the path the original image file + boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class] + class_names (dict): dict containing mapping of class int to class name + image (Tensor): A torch tensor containing the actual image data + """ + if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0: + # Log every bbox_interval times and deduplicate for any intermittend extra eval runs + if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images: + im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2)) + annotator = Annotator(im=im, pil=True) + for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])): + color = colors(i) + + class_name = class_names[int(class_nr)] + confidence_percentage = round(float(conf) * 100, 2) + label = f"{class_name}: {confidence_percentage}%" + + if conf > conf_threshold: + annotator.rectangle(box.cpu().numpy(), outline=color) + annotator.box_label(box.cpu().numpy(), label=label, color=color) + + annotated_image = annotator.result() + self.task.get_logger().report_image(title='Bounding Boxes', + series=image_path.name, + iteration=self.current_epoch, + image=annotated_image) + self.current_epoch_logged_images.add(image_path) diff --git a/utils/loggers/clearml/hpo.py b/utils/loggers/clearml/hpo.py new file mode 100644 index 0000000000000000000000000000000000000000..ee518b0fbfc89ee811b51bbf85341eee4f685be1 --- /dev/null +++ b/utils/loggers/clearml/hpo.py @@ -0,0 +1,84 @@ +from clearml import Task +# Connecting ClearML with the current process, +# from here on everything is logged automatically +from clearml.automation import HyperParameterOptimizer, UniformParameterRange +from clearml.automation.optuna import OptimizerOptuna + +task = Task.init(project_name='Hyper-Parameter Optimization', + task_name='YOLOv5', + task_type=Task.TaskTypes.optimizer, + reuse_last_task_id=False) + +# Example use case: +optimizer = HyperParameterOptimizer( + # This is the experiment we want to optimize + base_task_id='', + # here we define the hyper-parameters to optimize + # Notice: The parameter name should exactly match what you see in the UI: / + # For Example, here we see in the base experiment a section Named: "General" + # under it a parameter named "batch_size", this becomes "General/batch_size" + # If you have `argparse` for example, then arguments will appear under the "Args" section, + # and you should instead pass "Args/batch_size" + hyper_parameters=[ + UniformParameterRange('Hyperparameters/lr0', min_value=1e-5, max_value=1e-1), + UniformParameterRange('Hyperparameters/lrf', min_value=0.01, max_value=1.0), + UniformParameterRange('Hyperparameters/momentum', min_value=0.6, max_value=0.98), + UniformParameterRange('Hyperparameters/weight_decay', min_value=0.0, max_value=0.001), + UniformParameterRange('Hyperparameters/warmup_epochs', min_value=0.0, max_value=5.0), + UniformParameterRange('Hyperparameters/warmup_momentum', min_value=0.0, max_value=0.95), + UniformParameterRange('Hyperparameters/warmup_bias_lr', min_value=0.0, max_value=0.2), + UniformParameterRange('Hyperparameters/box', min_value=0.02, max_value=0.2), + UniformParameterRange('Hyperparameters/cls', min_value=0.2, max_value=4.0), + UniformParameterRange('Hyperparameters/cls_pw', min_value=0.5, max_value=2.0), + UniformParameterRange('Hyperparameters/obj', min_value=0.2, max_value=4.0), + UniformParameterRange('Hyperparameters/obj_pw', min_value=0.5, max_value=2.0), + UniformParameterRange('Hyperparameters/iou_t', min_value=0.1, max_value=0.7), + UniformParameterRange('Hyperparameters/anchor_t', min_value=2.0, max_value=8.0), + UniformParameterRange('Hyperparameters/fl_gamma', min_value=0.0, max_value=4.0), + UniformParameterRange('Hyperparameters/hsv_h', min_value=0.0, max_value=0.1), + UniformParameterRange('Hyperparameters/hsv_s', min_value=0.0, max_value=0.9), + UniformParameterRange('Hyperparameters/hsv_v', min_value=0.0, max_value=0.9), + UniformParameterRange('Hyperparameters/degrees', min_value=0.0, max_value=45.0), + UniformParameterRange('Hyperparameters/translate', min_value=0.0, max_value=0.9), + UniformParameterRange('Hyperparameters/scale', min_value=0.0, max_value=0.9), + UniformParameterRange('Hyperparameters/shear', min_value=0.0, max_value=10.0), + UniformParameterRange('Hyperparameters/perspective', min_value=0.0, max_value=0.001), + UniformParameterRange('Hyperparameters/flipud', min_value=0.0, max_value=1.0), + UniformParameterRange('Hyperparameters/fliplr', min_value=0.0, max_value=1.0), + UniformParameterRange('Hyperparameters/mosaic', min_value=0.0, max_value=1.0), + UniformParameterRange('Hyperparameters/mixup', min_value=0.0, max_value=1.0), + UniformParameterRange('Hyperparameters/copy_paste', min_value=0.0, max_value=1.0)], + # this is the objective metric we want to maximize/minimize + objective_metric_title='metrics', + objective_metric_series='mAP_0.5', + # now we decide if we want to maximize it or minimize it (accuracy we maximize) + objective_metric_sign='max', + # let us limit the number of concurrent experiments, + # this in turn will make sure we do dont bombard the scheduler with experiments. + # if we have an auto-scaler connected, this, by proxy, will limit the number of machine + max_number_of_concurrent_tasks=1, + # this is the optimizer class (actually doing the optimization) + # Currently, we can choose from GridSearch, RandomSearch or OptimizerBOHB (Bayesian optimization Hyper-Band) + optimizer_class=OptimizerOptuna, + # If specified only the top K performing Tasks will be kept, the others will be automatically archived + save_top_k_tasks_only=5, # 5, + compute_time_limit=None, + total_max_jobs=20, + min_iteration_per_job=None, + max_iteration_per_job=None, +) + +# report every 10 seconds, this is way too often, but we are testing here +optimizer.set_report_period(10 / 60) +# You can also use the line below instead to run all the optimizer tasks locally, without using queues or agent +# an_optimizer.start_locally(job_complete_callback=job_complete_callback) +# set the time limit for the optimization process (2 hours) +optimizer.set_time_limit(in_minutes=120.0) +# Start the optimization process in the local environment +optimizer.start_locally() +# wait until process is done (notice we are controlling the optimization process in the background) +optimizer.wait() +# make sure background optimization stopped +optimizer.stop() + +print('We are done, good bye') diff --git a/utils/loggers/comet/__init__.py b/utils/loggers/comet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b0318f88d6a63a6ba37fd2bf7ec4869084a45966 --- /dev/null +++ b/utils/loggers/comet/__init__.py @@ -0,0 +1,508 @@ +import glob +import json +import logging +import os +import sys +from pathlib import Path + +logger = logging.getLogger(__name__) + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[3] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH + +try: + import comet_ml + + # Project Configuration + config = comet_ml.config.get_config() + COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5") +except (ModuleNotFoundError, ImportError): + comet_ml = None + COMET_PROJECT_NAME = None + +import PIL +import torch +import torchvision.transforms as T +import yaml + +from utils.dataloaders import img2label_paths +from utils.general import check_dataset, scale_boxes, xywh2xyxy +from utils.metrics import box_iou + +COMET_PREFIX = "comet://" + +COMET_MODE = os.getenv("COMET_MODE", "online") + +# Model Saving Settings +COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5") + +# Dataset Artifact Settings +COMET_UPLOAD_DATASET = os.getenv("COMET_UPLOAD_DATASET", "false").lower() == "true" + +# Evaluation Settings +COMET_LOG_CONFUSION_MATRIX = os.getenv("COMET_LOG_CONFUSION_MATRIX", "true").lower() == "true" +COMET_LOG_PREDICTIONS = os.getenv("COMET_LOG_PREDICTIONS", "true").lower() == "true" +COMET_MAX_IMAGE_UPLOADS = int(os.getenv("COMET_MAX_IMAGE_UPLOADS", 100)) + +# Confusion Matrix Settings +CONF_THRES = float(os.getenv("CONF_THRES", 0.001)) +IOU_THRES = float(os.getenv("IOU_THRES", 0.6)) + +# Batch Logging Settings +COMET_LOG_BATCH_METRICS = os.getenv("COMET_LOG_BATCH_METRICS", "false").lower() == "true" +COMET_BATCH_LOGGING_INTERVAL = os.getenv("COMET_BATCH_LOGGING_INTERVAL", 1) +COMET_PREDICTION_LOGGING_INTERVAL = os.getenv("COMET_PREDICTION_LOGGING_INTERVAL", 1) +COMET_LOG_PER_CLASS_METRICS = os.getenv("COMET_LOG_PER_CLASS_METRICS", "false").lower() == "true" + +RANK = int(os.getenv("RANK", -1)) + +to_pil = T.ToPILImage() + + +class CometLogger: + """Log metrics, parameters, source code, models and much more + with Comet + """ + + def __init__(self, opt, hyp, run_id=None, job_type="Training", **experiment_kwargs) -> None: + self.job_type = job_type + self.opt = opt + self.hyp = hyp + + # Comet Flags + self.comet_mode = COMET_MODE + + self.save_model = opt.save_period > -1 + self.model_name = COMET_MODEL_NAME + + # Batch Logging Settings + self.log_batch_metrics = COMET_LOG_BATCH_METRICS + self.comet_log_batch_interval = COMET_BATCH_LOGGING_INTERVAL + + # Dataset Artifact Settings + self.upload_dataset = self.opt.upload_dataset if self.opt.upload_dataset else COMET_UPLOAD_DATASET + self.resume = self.opt.resume + + # Default parameters to pass to Experiment objects + self.default_experiment_kwargs = { + "log_code": False, + "log_env_gpu": True, + "log_env_cpu": True, + "project_name": COMET_PROJECT_NAME,} + self.default_experiment_kwargs.update(experiment_kwargs) + self.experiment = self._get_experiment(self.comet_mode, run_id) + + self.data_dict = self.check_dataset(self.opt.data) + self.class_names = self.data_dict["names"] + self.num_classes = self.data_dict["nc"] + + self.logged_images_count = 0 + self.max_images = COMET_MAX_IMAGE_UPLOADS + + if run_id is None: + self.experiment.log_other("Created from", "YOLOv5") + if not isinstance(self.experiment, comet_ml.OfflineExperiment): + workspace, project_name, experiment_id = self.experiment.url.split("/")[-3:] + self.experiment.log_other( + "Run Path", + f"{workspace}/{project_name}/{experiment_id}", + ) + self.log_parameters(vars(opt)) + self.log_parameters(self.opt.hyp) + self.log_asset_data( + self.opt.hyp, + name="hyperparameters.json", + metadata={"type": "hyp-config-file"}, + ) + self.log_asset( + f"{self.opt.save_dir}/opt.yaml", + metadata={"type": "opt-config-file"}, + ) + + self.comet_log_confusion_matrix = COMET_LOG_CONFUSION_MATRIX + + if hasattr(self.opt, "conf_thres"): + self.conf_thres = self.opt.conf_thres + else: + self.conf_thres = CONF_THRES + if hasattr(self.opt, "iou_thres"): + self.iou_thres = self.opt.iou_thres + else: + self.iou_thres = IOU_THRES + + self.log_parameters({"val_iou_threshold": self.iou_thres, "val_conf_threshold": self.conf_thres}) + + self.comet_log_predictions = COMET_LOG_PREDICTIONS + if self.opt.bbox_interval == -1: + self.comet_log_prediction_interval = 1 if self.opt.epochs < 10 else self.opt.epochs // 10 + else: + self.comet_log_prediction_interval = self.opt.bbox_interval + + if self.comet_log_predictions: + self.metadata_dict = {} + self.logged_image_names = [] + + self.comet_log_per_class_metrics = COMET_LOG_PER_CLASS_METRICS + + self.experiment.log_others({ + "comet_mode": COMET_MODE, + "comet_max_image_uploads": COMET_MAX_IMAGE_UPLOADS, + "comet_log_per_class_metrics": COMET_LOG_PER_CLASS_METRICS, + "comet_log_batch_metrics": COMET_LOG_BATCH_METRICS, + "comet_log_confusion_matrix": COMET_LOG_CONFUSION_MATRIX, + "comet_model_name": COMET_MODEL_NAME,}) + + # Check if running the Experiment with the Comet Optimizer + if hasattr(self.opt, "comet_optimizer_id"): + self.experiment.log_other("optimizer_id", self.opt.comet_optimizer_id) + self.experiment.log_other("optimizer_objective", self.opt.comet_optimizer_objective) + self.experiment.log_other("optimizer_metric", self.opt.comet_optimizer_metric) + self.experiment.log_other("optimizer_parameters", json.dumps(self.hyp)) + + def _get_experiment(self, mode, experiment_id=None): + if mode == "offline": + if experiment_id is not None: + return comet_ml.ExistingOfflineExperiment( + previous_experiment=experiment_id, + **self.default_experiment_kwargs, + ) + + return comet_ml.OfflineExperiment(**self.default_experiment_kwargs,) + + else: + try: + if experiment_id is not None: + return comet_ml.ExistingExperiment( + previous_experiment=experiment_id, + **self.default_experiment_kwargs, + ) + + return comet_ml.Experiment(**self.default_experiment_kwargs) + + except ValueError: + logger.warning("COMET WARNING: " + "Comet credentials have not been set. " + "Comet will default to offline logging. " + "Please set your credentials to enable online logging.") + return self._get_experiment("offline", experiment_id) + + return + + def log_metrics(self, log_dict, **kwargs): + self.experiment.log_metrics(log_dict, **kwargs) + + def log_parameters(self, log_dict, **kwargs): + self.experiment.log_parameters(log_dict, **kwargs) + + def log_asset(self, asset_path, **kwargs): + self.experiment.log_asset(asset_path, **kwargs) + + def log_asset_data(self, asset, **kwargs): + self.experiment.log_asset_data(asset, **kwargs) + + def log_image(self, img, **kwargs): + self.experiment.log_image(img, **kwargs) + + def log_model(self, path, opt, epoch, fitness_score, best_model=False): + if not self.save_model: + return + + model_metadata = { + "fitness_score": fitness_score[-1], + "epochs_trained": epoch + 1, + "save_period": opt.save_period, + "total_epochs": opt.epochs,} + + model_files = glob.glob(f"{path}/*.pt") + for model_path in model_files: + name = Path(model_path).name + + self.experiment.log_model( + self.model_name, + file_or_folder=model_path, + file_name=name, + metadata=model_metadata, + overwrite=True, + ) + + def check_dataset(self, data_file): + with open(data_file) as f: + data_config = yaml.safe_load(f) + + if data_config['path'].startswith(COMET_PREFIX): + path = data_config['path'].replace(COMET_PREFIX, "") + data_dict = self.download_dataset_artifact(path) + + return data_dict + + self.log_asset(self.opt.data, metadata={"type": "data-config-file"}) + + return check_dataset(data_file) + + def log_predictions(self, image, labelsn, path, shape, predn): + if self.logged_images_count >= self.max_images: + return + detections = predn[predn[:, 4] > self.conf_thres] + iou = box_iou(labelsn[:, 1:], detections[:, :4]) + mask, _ = torch.where(iou > self.iou_thres) + if len(mask) == 0: + return + + filtered_detections = detections[mask] + filtered_labels = labelsn[mask] + + image_id = path.split("/")[-1].split(".")[0] + image_name = f"{image_id}_curr_epoch_{self.experiment.curr_epoch}" + if image_name not in self.logged_image_names: + native_scale_image = PIL.Image.open(path) + self.log_image(native_scale_image, name=image_name) + self.logged_image_names.append(image_name) + + metadata = [] + for cls, *xyxy in filtered_labels.tolist(): + metadata.append({ + "label": f"{self.class_names[int(cls)]}-gt", + "score": 100, + "box": { + "x": xyxy[0], + "y": xyxy[1], + "x2": xyxy[2], + "y2": xyxy[3]},}) + for *xyxy, conf, cls in filtered_detections.tolist(): + metadata.append({ + "label": f"{self.class_names[int(cls)]}", + "score": conf * 100, + "box": { + "x": xyxy[0], + "y": xyxy[1], + "x2": xyxy[2], + "y2": xyxy[3]},}) + + self.metadata_dict[image_name] = metadata + self.logged_images_count += 1 + + return + + def preprocess_prediction(self, image, labels, shape, pred): + nl, _ = labels.shape[0], pred.shape[0] + + # Predictions + if self.opt.single_cls: + pred[:, 5] = 0 + + predn = pred.clone() + scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) + + labelsn = None + if nl: + tbox = xywh2xyxy(labels[:, 1:5]) # target boxes + scale_boxes(image.shape[1:], tbox, shape[0], shape[1]) # native-space labels + labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels + scale_boxes(image.shape[1:], predn[:, :4], shape[0], shape[1]) # native-space pred + + return predn, labelsn + + def add_assets_to_artifact(self, artifact, path, asset_path, split): + img_paths = sorted(glob.glob(f"{asset_path}/*")) + label_paths = img2label_paths(img_paths) + + for image_file, label_file in zip(img_paths, label_paths): + image_logical_path, label_logical_path = map(lambda x: os.path.relpath(x, path), [image_file, label_file]) + + try: + artifact.add(image_file, logical_path=image_logical_path, metadata={"split": split}) + artifact.add(label_file, logical_path=label_logical_path, metadata={"split": split}) + except ValueError as e: + logger.error('COMET ERROR: Error adding file to Artifact. Skipping file.') + logger.error(f"COMET ERROR: {e}") + continue + + return artifact + + def upload_dataset_artifact(self): + dataset_name = self.data_dict.get("dataset_name", "yolov5-dataset") + path = str((ROOT / Path(self.data_dict["path"])).resolve()) + + metadata = self.data_dict.copy() + for key in ["train", "val", "test"]: + split_path = metadata.get(key) + if split_path is not None: + metadata[key] = split_path.replace(path, "") + + artifact = comet_ml.Artifact(name=dataset_name, artifact_type="dataset", metadata=metadata) + for key in metadata.keys(): + if key in ["train", "val", "test"]: + if isinstance(self.upload_dataset, str) and (key != self.upload_dataset): + continue + + asset_path = self.data_dict.get(key) + if asset_path is not None: + artifact = self.add_assets_to_artifact(artifact, path, asset_path, key) + + self.experiment.log_artifact(artifact) + + return + + def download_dataset_artifact(self, artifact_path): + logged_artifact = self.experiment.get_artifact(artifact_path) + artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name) + logged_artifact.download(artifact_save_dir) + + metadata = logged_artifact.metadata + data_dict = metadata.copy() + data_dict["path"] = artifact_save_dir + + metadata_names = metadata.get("names") + if type(metadata_names) == dict: + data_dict["names"] = {int(k): v for k, v in metadata.get("names").items()} + elif type(metadata_names) == list: + data_dict["names"] = {int(k): v for k, v in zip(range(len(metadata_names)), metadata_names)} + else: + raise "Invalid 'names' field in dataset yaml file. Please use a list or dictionary" + + data_dict = self.update_data_paths(data_dict) + return data_dict + + def update_data_paths(self, data_dict): + path = data_dict.get("path", "") + + for split in ["train", "val", "test"]: + if data_dict.get(split): + split_path = data_dict.get(split) + data_dict[split] = (f"{path}/{split_path}" if isinstance(split, str) else [ + f"{path}/{x}" for x in split_path]) + + return data_dict + + def on_pretrain_routine_end(self, paths): + if self.opt.resume: + return + + for path in paths: + self.log_asset(str(path)) + + if self.upload_dataset: + if not self.resume: + self.upload_dataset_artifact() + + return + + def on_train_start(self): + self.log_parameters(self.hyp) + + def on_train_epoch_start(self): + return + + def on_train_epoch_end(self, epoch): + self.experiment.curr_epoch = epoch + + return + + def on_train_batch_start(self): + return + + def on_train_batch_end(self, log_dict, step): + self.experiment.curr_step = step + if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0): + self.log_metrics(log_dict, step=step) + + return + + def on_train_end(self, files, save_dir, last, best, epoch, results): + if self.comet_log_predictions: + curr_epoch = self.experiment.curr_epoch + self.experiment.log_asset_data(self.metadata_dict, "image-metadata.json", epoch=curr_epoch) + + for f in files: + self.log_asset(f, metadata={"epoch": epoch}) + self.log_asset(f"{save_dir}/results.csv", metadata={"epoch": epoch}) + + if not self.opt.evolve: + model_path = str(best if best.exists() else last) + name = Path(model_path).name + if self.save_model: + self.experiment.log_model( + self.model_name, + file_or_folder=model_path, + file_name=name, + overwrite=True, + ) + + # Check if running Experiment with Comet Optimizer + if hasattr(self.opt, 'comet_optimizer_id'): + metric = results.get(self.opt.comet_optimizer_metric) + self.experiment.log_other('optimizer_metric_value', metric) + + self.finish_run() + + def on_val_start(self): + return + + def on_val_batch_start(self): + return + + def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs): + if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)): + return + + for si, pred in enumerate(outputs): + if len(pred) == 0: + continue + + image = images[si] + labels = targets[targets[:, 0] == si, 1:] + shape = shapes[si] + path = paths[si] + predn, labelsn = self.preprocess_prediction(image, labels, shape, pred) + if labelsn is not None: + self.log_predictions(image, labelsn, path, shape, predn) + + return + + def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix): + if self.comet_log_per_class_metrics: + if self.num_classes > 1: + for i, c in enumerate(ap_class): + class_name = self.class_names[c] + self.experiment.log_metrics( + { + 'mAP@.5': ap50[i], + 'mAP@.5:.95': ap[i], + 'precision': p[i], + 'recall': r[i], + 'f1': f1[i], + 'true_positives': tp[i], + 'false_positives': fp[i], + 'support': nt[c]}, + prefix=class_name) + + if self.comet_log_confusion_matrix: + epoch = self.experiment.curr_epoch + class_names = list(self.class_names.values()) + class_names.append("background") + num_classes = len(class_names) + + self.experiment.log_confusion_matrix( + matrix=confusion_matrix.matrix, + max_categories=num_classes, + labels=class_names, + epoch=epoch, + column_label='Actual Category', + row_label='Predicted Category', + file_name=f"confusion-matrix-epoch-{epoch}.json", + ) + + def on_fit_epoch_end(self, result, epoch): + self.log_metrics(result, epoch=epoch) + + def on_model_save(self, last, epoch, final_epoch, best_fitness, fi): + if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1: + self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi) + + def on_params_update(self, params): + self.log_parameters(params) + + def finish_run(self): + self.experiment.end() diff --git a/utils/loggers/comet/comet_utils.py b/utils/loggers/comet/comet_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3cbd45156b576d09024fd11ea9dce83d4a6e5143 --- /dev/null +++ b/utils/loggers/comet/comet_utils.py @@ -0,0 +1,150 @@ +import logging +import os +from urllib.parse import urlparse + +try: + import comet_ml +except (ModuleNotFoundError, ImportError): + comet_ml = None + +import yaml + +logger = logging.getLogger(__name__) + +COMET_PREFIX = "comet://" +COMET_MODEL_NAME = os.getenv("COMET_MODEL_NAME", "yolov5") +COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME", "last.pt") + + +def download_model_checkpoint(opt, experiment): + model_dir = f"{opt.project}/{experiment.name}" + os.makedirs(model_dir, exist_ok=True) + + model_name = COMET_MODEL_NAME + model_asset_list = experiment.get_model_asset_list(model_name) + + if len(model_asset_list) == 0: + logger.error(f"COMET ERROR: No checkpoints found for model name : {model_name}") + return + + model_asset_list = sorted( + model_asset_list, + key=lambda x: x["step"], + reverse=True, + ) + logged_checkpoint_map = {asset["fileName"]: asset["assetId"] for asset in model_asset_list} + + resource_url = urlparse(opt.weights) + checkpoint_filename = resource_url.query + + if checkpoint_filename: + asset_id = logged_checkpoint_map.get(checkpoint_filename) + else: + asset_id = logged_checkpoint_map.get(COMET_DEFAULT_CHECKPOINT_FILENAME) + checkpoint_filename = COMET_DEFAULT_CHECKPOINT_FILENAME + + if asset_id is None: + logger.error(f"COMET ERROR: Checkpoint {checkpoint_filename} not found in the given Experiment") + return + + try: + logger.info(f"COMET INFO: Downloading checkpoint {checkpoint_filename}") + asset_filename = checkpoint_filename + + model_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) + model_download_path = f"{model_dir}/{asset_filename}" + with open(model_download_path, "wb") as f: + f.write(model_binary) + + opt.weights = model_download_path + + except Exception as e: + logger.warning("COMET WARNING: Unable to download checkpoint from Comet") + logger.exception(e) + + +def set_opt_parameters(opt, experiment): + """Update the opts Namespace with parameters + from Comet's ExistingExperiment when resuming a run + + Args: + opt (argparse.Namespace): Namespace of command line options + experiment (comet_ml.APIExperiment): Comet API Experiment object + """ + asset_list = experiment.get_asset_list() + resume_string = opt.resume + + for asset in asset_list: + if asset["fileName"] == "opt.yaml": + asset_id = asset["assetId"] + asset_binary = experiment.get_asset(asset_id, return_type="binary", stream=False) + opt_dict = yaml.safe_load(asset_binary) + for key, value in opt_dict.items(): + setattr(opt, key, value) + opt.resume = resume_string + + # Save hyperparameters to YAML file + # Necessary to pass checks in training script + save_dir = f"{opt.project}/{experiment.name}" + os.makedirs(save_dir, exist_ok=True) + + hyp_yaml_path = f"{save_dir}/hyp.yaml" + with open(hyp_yaml_path, "w") as f: + yaml.dump(opt.hyp, f) + opt.hyp = hyp_yaml_path + + +def check_comet_weights(opt): + """Downloads model weights from Comet and updates the + weights path to point to saved weights location + + Args: + opt (argparse.Namespace): Command Line arguments passed + to YOLOv5 training script + + Returns: + None/bool: Return True if weights are successfully downloaded + else return None + """ + if comet_ml is None: + return + + if isinstance(opt.weights, str): + if opt.weights.startswith(COMET_PREFIX): + api = comet_ml.API() + resource = urlparse(opt.weights) + experiment_path = f"{resource.netloc}{resource.path}" + experiment = api.get(experiment_path) + download_model_checkpoint(opt, experiment) + return True + + return None + + +def check_comet_resume(opt): + """Restores run parameters to its original state based on the model checkpoint + and logged Experiment parameters. + + Args: + opt (argparse.Namespace): Command Line arguments passed + to YOLOv5 training script + + Returns: + None/bool: Return True if the run is restored successfully + else return None + """ + if comet_ml is None: + return + + if isinstance(opt.resume, str): + if opt.resume.startswith(COMET_PREFIX): + api = comet_ml.API() + resource = urlparse(opt.resume) + experiment_path = f"{resource.netloc}{resource.path}" + experiment = api.get(experiment_path) + set_opt_parameters(opt, experiment) + download_model_checkpoint(opt, experiment) + + return True + + return None diff --git a/utils/loggers/comet/hpo.py b/utils/loggers/comet/hpo.py new file mode 100644 index 0000000000000000000000000000000000000000..7dd5c92e8de170222b3cd3eae858f4f3cfddaff6 --- /dev/null +++ b/utils/loggers/comet/hpo.py @@ -0,0 +1,118 @@ +import argparse +import json +import logging +import os +import sys +from pathlib import Path + +import comet_ml + +logger = logging.getLogger(__name__) + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[3] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH + +from train import train +from utils.callbacks import Callbacks +from utils.general import increment_path +from utils.torch_utils import select_device + +# Project Configuration +config = comet_ml.config.get_config() +COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.project_name", default="yolov5") + + +def get_args(known=False): + parser = argparse.ArgumentParser() + parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='initial weights path') + parser.add_argument('--cfg', type=str, default='', help='model.yaml path') + parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path') + parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch-low.yaml', help='hyperparameters path') + parser.add_argument('--epochs', type=int, default=300, help='total training epochs') + parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') + parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') + parser.add_argument('--rect', action='store_true', help='rectangular training') + parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') + parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') + parser.add_argument('--noval', action='store_true', help='only validate final epoch') + parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') + parser.add_argument('--noplots', action='store_true', help='save no plot files') + parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations') + parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') + parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"') + parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') + parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') + parser.add_argument('--optimizer', type=str, choices=['SGD', 'Adam', 'AdamW'], default='SGD', help='optimizer') + parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') + parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') + parser.add_argument('--project', default=ROOT / 'runs/train', help='save to project/name') + parser.add_argument('--name', default='exp', help='save to project/name') + parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--quad', action='store_true', help='quad dataloader') + parser.add_argument('--cos-lr', action='store_true', help='cosine LR scheduler') + parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') + parser.add_argument('--patience', type=int, default=100, help='EarlyStopping patience (epochs without improvement)') + parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone=10, first3=0 1 2') + parser.add_argument('--save-period', type=int, default=-1, help='Save checkpoint every x epochs (disabled if < 1)') + parser.add_argument('--seed', type=int, default=0, help='Global training seed') + parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify') + + # Weights & Biases arguments + parser.add_argument('--entity', default=None, help='W&B: Entity') + parser.add_argument('--upload_dataset', nargs='?', const=True, default=False, help='W&B: Upload data, "val" option') + parser.add_argument('--bbox_interval', type=int, default=-1, help='W&B: Set bounding-box image logging interval') + parser.add_argument('--artifact_alias', type=str, default='latest', help='W&B: Version of dataset artifact to use') + + # Comet Arguments + parser.add_argument("--comet_optimizer_config", type=str, help="Comet: Path to a Comet Optimizer Config File.") + parser.add_argument("--comet_optimizer_id", type=str, help="Comet: ID of the Comet Optimizer sweep.") + parser.add_argument("--comet_optimizer_objective", type=str, help="Comet: Set to 'minimize' or 'maximize'.") + parser.add_argument("--comet_optimizer_metric", type=str, help="Comet: Metric to Optimize.") + parser.add_argument("--comet_optimizer_workers", + type=int, + default=1, + help="Comet: Number of Parallel Workers to use with the Comet Optimizer.") + + return parser.parse_known_args()[0] if known else parser.parse_args() + + +def run(parameters, opt): + hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]} + + opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) + opt.batch_size = parameters.get("batch_size") + opt.epochs = parameters.get("epochs") + + device = select_device(opt.device, batch_size=opt.batch_size) + train(hyp_dict, opt, device, callbacks=Callbacks()) + + +if __name__ == "__main__": + opt = get_args(known=True) + + opt.weights = str(opt.weights) + opt.cfg = str(opt.cfg) + opt.data = str(opt.data) + opt.project = str(opt.project) + + optimizer_id = os.getenv("COMET_OPTIMIZER_ID") + if optimizer_id is None: + with open(opt.comet_optimizer_config) as f: + optimizer_config = json.load(f) + optimizer = comet_ml.Optimizer(optimizer_config) + else: + optimizer = comet_ml.Optimizer(optimizer_id) + + opt.comet_optimizer_id = optimizer.id + status = optimizer.status() + + opt.comet_optimizer_objective = status["spec"]["objective"] + opt.comet_optimizer_metric = status["spec"]["metric"] + + logger.info("COMET INFO: Starting Hyperparameter Sweep") + for parameter in optimizer.get_parameters(): + run(parameter["parameters"], opt) diff --git a/utils/loggers/comet/optimizer_config.json b/utils/loggers/comet/optimizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..83ddddab6f2084b4bdf84dca1e61696de200d1b8 --- /dev/null +++ b/utils/loggers/comet/optimizer_config.json @@ -0,0 +1,209 @@ +{ + "algorithm": "random", + "parameters": { + "anchor_t": { + "type": "discrete", + "values": [ + 2, + 8 + ] + }, + "batch_size": { + "type": "discrete", + "values": [ + 16, + 32, + 64 + ] + }, + "box": { + "type": "discrete", + "values": [ + 0.02, + 0.2 + ] + }, + "cls": { + "type": "discrete", + "values": [ + 0.2 + ] + }, + "cls_pw": { + "type": "discrete", + "values": [ + 0.5 + ] + }, + "copy_paste": { + "type": "discrete", + "values": [ + 1 + ] + }, + "degrees": { + "type": "discrete", + "values": [ + 0, + 45 + ] + }, + "epochs": { + "type": "discrete", + "values": [ + 5 + ] + }, + "fl_gamma": { + "type": "discrete", + "values": [ + 0 + ] + }, + "fliplr": { + "type": "discrete", + "values": [ + 0 + ] + }, + "flipud": { + "type": "discrete", + "values": [ + 0 + ] + }, + "hsv_h": { + "type": "discrete", + "values": [ + 0 + ] + }, + "hsv_s": { + "type": "discrete", + "values": [ + 0 + ] + }, + "hsv_v": { + "type": "discrete", + "values": [ + 0 + ] + }, + "iou_t": { + "type": "discrete", + "values": [ + 0.7 + ] + }, + "lr0": { + "type": "discrete", + "values": [ + 1e-05, + 0.1 + ] + }, + "lrf": { + "type": "discrete", + "values": [ + 0.01, + 1 + ] + }, + "mixup": { + "type": "discrete", + "values": [ + 1 + ] + }, + "momentum": { + "type": "discrete", + "values": [ + 0.6 + ] + }, + "mosaic": { + "type": "discrete", + "values": [ + 0 + ] + }, + "obj": { + "type": "discrete", + "values": [ + 0.2 + ] + }, + "obj_pw": { + "type": "discrete", + "values": [ + 0.5 + ] + }, + "optimizer": { + "type": "categorical", + "values": [ + "SGD", + "Adam", + "AdamW" + ] + }, + "perspective": { + "type": "discrete", + "values": [ + 0 + ] + }, + "scale": { + "type": "discrete", + "values": [ + 0 + ] + }, + "shear": { + "type": "discrete", + "values": [ + 0 + ] + }, + "translate": { + "type": "discrete", + "values": [ + 0 + ] + }, + "warmup_bias_lr": { + "type": "discrete", + "values": [ + 0, + 0.2 + ] + }, + "warmup_epochs": { + "type": "discrete", + "values": [ + 5 + ] + }, + "warmup_momentum": { + "type": "discrete", + "values": [ + 0, + 0.95 + ] + }, + "weight_decay": { + "type": "discrete", + "values": [ + 0, + 0.001 + ] + } + }, + "spec": { + "maxCombo": 0, + "metric": "metrics/mAP_0.5", + "objective": "maximize" + }, + "trials": 1 +} diff --git a/utils/loggers/wandb/__init__.py b/utils/loggers/wandb/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..84952a8167bc2975913a6def6b4f027d566552a9 --- /dev/null +++ b/utils/loggers/wandb/__init__.py @@ -0,0 +1 @@ +# init \ No newline at end of file diff --git a/utils/loggers/wandb/log_dataset.py b/utils/loggers/wandb/log_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..06e81fb693072c99703e5c52b169892b7fd9a8cc --- /dev/null +++ b/utils/loggers/wandb/log_dataset.py @@ -0,0 +1,27 @@ +import argparse + +from wandb_utils import WandbLogger + +from utils.general import LOGGER + +WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' + + +def create_dataset_artifact(opt): + logger = WandbLogger(opt, None, job_type='Dataset Creation') # TODO: return value unused + if not logger.wandb: + LOGGER.info("install wandb using `pip install wandb` to log the dataset") + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') + parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset') + parser.add_argument('--project', type=str, default='YOLOv5', help='name of W&B Project') + parser.add_argument('--entity', default=None, help='W&B entity') + parser.add_argument('--name', type=str, default='log dataset', help='name of W&B run') + + opt = parser.parse_args() + opt.resume = False # Explicitly disallow resume check for dataset upload job + + create_dataset_artifact(opt) diff --git a/utils/loggers/wandb/sweep.py b/utils/loggers/wandb/sweep.py new file mode 100644 index 0000000000000000000000000000000000000000..d49ea6f2778b2e87d0f535c2b3595ccceebab459 --- /dev/null +++ b/utils/loggers/wandb/sweep.py @@ -0,0 +1,41 @@ +import sys +from pathlib import Path + +import wandb + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[3] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH + +from train import parse_opt, train +from utils.callbacks import Callbacks +from utils.general import increment_path +from utils.torch_utils import select_device + + +def sweep(): + wandb.init() + # Get hyp dict from sweep agent. Copy because train() modifies parameters which confused wandb. + hyp_dict = vars(wandb.config).get("_items").copy() + + # Workaround: get necessary opt args + opt = parse_opt(known=True) + opt.batch_size = hyp_dict.get("batch_size") + opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve)) + opt.epochs = hyp_dict.get("epochs") + opt.nosave = True + opt.data = hyp_dict.get("data") + opt.weights = str(opt.weights) + opt.cfg = str(opt.cfg) + opt.data = str(opt.data) + opt.hyp = str(opt.hyp) + opt.project = str(opt.project) + device = select_device(opt.device, batch_size=opt.batch_size) + + # train + train(hyp_dict, opt, device, callbacks=Callbacks()) + + +if __name__ == "__main__": + sweep() diff --git a/utils/loggers/wandb/sweep.yaml b/utils/loggers/wandb/sweep.yaml new file mode 100644 index 0000000000000000000000000000000000000000..688b1ea0285f42e779d301ba910bf4e9fe50305c --- /dev/null +++ b/utils/loggers/wandb/sweep.yaml @@ -0,0 +1,143 @@ +# Hyperparameters for training +# To set range- +# Provide min and max values as: +# parameter: +# +# min: scalar +# max: scalar +# OR +# +# Set a specific list of search space- +# parameter: +# values: [scalar1, scalar2, scalar3...] +# +# You can use grid, bayesian and hyperopt search strategy +# For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration + +program: utils/loggers/wandb/sweep.py +method: random +metric: + name: metrics/mAP_0.5 + goal: maximize + +parameters: + # hyperparameters: set either min, max range or values list + data: + value: "data/coco128.yaml" + batch_size: + values: [64] + epochs: + values: [10] + + lr0: + distribution: uniform + min: 1e-5 + max: 1e-1 + lrf: + distribution: uniform + min: 0.01 + max: 1.0 + momentum: + distribution: uniform + min: 0.6 + max: 0.98 + weight_decay: + distribution: uniform + min: 0.0 + max: 0.001 + warmup_epochs: + distribution: uniform + min: 0.0 + max: 5.0 + warmup_momentum: + distribution: uniform + min: 0.0 + max: 0.95 + warmup_bias_lr: + distribution: uniform + min: 0.0 + max: 0.2 + box: + distribution: uniform + min: 0.02 + max: 0.2 + cls: + distribution: uniform + min: 0.2 + max: 4.0 + cls_pw: + distribution: uniform + min: 0.5 + max: 2.0 + obj: + distribution: uniform + min: 0.2 + max: 4.0 + obj_pw: + distribution: uniform + min: 0.5 + max: 2.0 + iou_t: + distribution: uniform + min: 0.1 + max: 0.7 + anchor_t: + distribution: uniform + min: 2.0 + max: 8.0 + fl_gamma: + distribution: uniform + min: 0.0 + max: 4.0 + hsv_h: + distribution: uniform + min: 0.0 + max: 0.1 + hsv_s: + distribution: uniform + min: 0.0 + max: 0.9 + hsv_v: + distribution: uniform + min: 0.0 + max: 0.9 + degrees: + distribution: uniform + min: 0.0 + max: 45.0 + translate: + distribution: uniform + min: 0.0 + max: 0.9 + scale: + distribution: uniform + min: 0.0 + max: 0.9 + shear: + distribution: uniform + min: 0.0 + max: 10.0 + perspective: + distribution: uniform + min: 0.0 + max: 0.001 + flipud: + distribution: uniform + min: 0.0 + max: 1.0 + fliplr: + distribution: uniform + min: 0.0 + max: 1.0 + mosaic: + distribution: uniform + min: 0.0 + max: 1.0 + mixup: + distribution: uniform + min: 0.0 + max: 1.0 + copy_paste: + distribution: uniform + min: 0.0 + max: 1.0 diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..238f4edbf2a0ddf34c024fbb6775c71dd19e18aa --- /dev/null +++ b/utils/loggers/wandb/wandb_utils.py @@ -0,0 +1,589 @@ +"""Utilities and tools for tracking runs with Weights & Biases.""" + +import logging +import os +import sys +from contextlib import contextmanager +from pathlib import Path +from typing import Dict + +import yaml +from tqdm import tqdm + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[3] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH + +from utils.dataloaders import LoadImagesAndLabels, img2label_paths +from utils.general import LOGGER, check_dataset, check_file + +try: + import wandb + + assert hasattr(wandb, '__version__') # verify package import not local dir +except (ImportError, AssertionError): + wandb = None + +RANK = int(os.getenv('RANK', -1)) +WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' + + +def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX): + return from_string[len(prefix):] + + +def check_wandb_config_file(data_config_file): + wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path + if Path(wandb_config).is_file(): + return wandb_config + return data_config_file + + +def check_wandb_dataset(data_file): + is_trainset_wandb_artifact = False + is_valset_wandb_artifact = False + if isinstance(data_file, dict): + # In that case another dataset manager has already processed it and we don't have to + return data_file + if check_file(data_file) and data_file.endswith('.yaml'): + with open(data_file, errors='ignore') as f: + data_dict = yaml.safe_load(f) + is_trainset_wandb_artifact = isinstance(data_dict['train'], + str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX) + is_valset_wandb_artifact = isinstance(data_dict['val'], + str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX) + if is_trainset_wandb_artifact or is_valset_wandb_artifact: + return data_dict + else: + return check_dataset(data_file) + + +def get_run_info(run_path): + run_path = Path(remove_prefix(run_path, WANDB_ARTIFACT_PREFIX)) + run_id = run_path.stem + project = run_path.parent.stem + entity = run_path.parent.parent.stem + model_artifact_name = 'run_' + run_id + '_model' + return entity, project, run_id, model_artifact_name + + +def check_wandb_resume(opt): + process_wandb_config_ddp_mode(opt) if RANK not in [-1, 0] else None + if isinstance(opt.resume, str): + if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): + if RANK not in [-1, 0]: # For resuming DDP runs + entity, project, run_id, model_artifact_name = get_run_info(opt.resume) + api = wandb.Api() + artifact = api.artifact(entity + '/' + project + '/' + model_artifact_name + ':latest') + modeldir = artifact.download() + opt.weights = str(Path(modeldir) / "last.pt") + return True + return None + + +def process_wandb_config_ddp_mode(opt): + with open(check_file(opt.data), errors='ignore') as f: + data_dict = yaml.safe_load(f) # data dict + train_dir, val_dir = None, None + if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX): + api = wandb.Api() + train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias) + train_dir = train_artifact.download() + train_path = Path(train_dir) / 'data/images/' + data_dict['train'] = str(train_path) + + if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX): + api = wandb.Api() + val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias) + val_dir = val_artifact.download() + val_path = Path(val_dir) / 'data/images/' + data_dict['val'] = str(val_path) + if train_dir or val_dir: + ddp_data_path = str(Path(val_dir) / 'wandb_local_data.yaml') + with open(ddp_data_path, 'w') as f: + yaml.safe_dump(data_dict, f) + opt.data = ddp_data_path + + +class WandbLogger(): + """Log training runs, datasets, models, and predictions to Weights & Biases. + + This logger sends information to W&B at wandb.ai. By default, this information + includes hyperparameters, system configuration and metrics, model metrics, + and basic data metrics and analyses. + + By providing additional command line arguments to train.py, datasets, + models and predictions can also be logged. + + For more on how this logger is used, see the Weights & Biases documentation: + https://docs.wandb.com/guides/integrations/yolov5 + """ + + def __init__(self, opt, run_id=None, job_type='Training'): + """ + - Initialize WandbLogger instance + - Upload dataset if opt.upload_dataset is True + - Setup training processes if job_type is 'Training' + + arguments: + opt (namespace) -- Commandline arguments for this run + run_id (str) -- Run ID of W&B run to be resumed + job_type (str) -- To set the job_type for this run + + """ + # Temporary-fix + if opt.upload_dataset: + opt.upload_dataset = False + # LOGGER.info("Uploading Dataset functionality is not being supported temporarily due to a bug.") + + # Pre-training routine -- + self.job_type = job_type + self.wandb, self.wandb_run = wandb, None if not wandb else wandb.run + self.val_artifact, self.train_artifact = None, None + self.train_artifact_path, self.val_artifact_path = None, None + self.result_artifact = None + self.val_table, self.result_table = None, None + self.bbox_media_panel_images = [] + self.val_table_path_map = None + self.max_imgs_to_log = 16 + self.wandb_artifact_data_dict = None + self.data_dict = None + # It's more elegant to stick to 1 wandb.init call, + # but useful config data is overwritten in the WandbLogger's wandb.init call + if isinstance(opt.resume, str): # checks resume from artifact + if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): + entity, project, run_id, model_artifact_name = get_run_info(opt.resume) + model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name + assert wandb, 'install wandb to resume wandb runs' + # Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config + self.wandb_run = wandb.init(id=run_id, + project=project, + entity=entity, + resume='allow', + allow_val_change=True) + opt.resume = model_artifact_name + elif self.wandb: + self.wandb_run = wandb.init(config=opt, + resume="allow", + project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, + entity=opt.entity, + name=opt.name if opt.name != 'exp' else None, + job_type=job_type, + id=run_id, + allow_val_change=True) if not wandb.run else wandb.run + if self.wandb_run: + if self.job_type == 'Training': + if opt.upload_dataset: + if not opt.resume: + self.wandb_artifact_data_dict = self.check_and_upload_dataset(opt) + + if isinstance(opt.data, dict): + # This means another dataset manager has already processed the dataset info (e.g. ClearML) + # and they will have stored the already processed dict in opt.data + self.data_dict = opt.data + elif opt.resume: + # resume from artifact + if isinstance(opt.resume, str) and opt.resume.startswith(WANDB_ARTIFACT_PREFIX): + self.data_dict = dict(self.wandb_run.config.data_dict) + else: # local resume + self.data_dict = check_wandb_dataset(opt.data) + else: + self.data_dict = check_wandb_dataset(opt.data) + self.wandb_artifact_data_dict = self.wandb_artifact_data_dict or self.data_dict + + # write data_dict to config. useful for resuming from artifacts. Do this only when not resuming. + self.wandb_run.config.update({'data_dict': self.wandb_artifact_data_dict}, allow_val_change=True) + self.setup_training(opt) + + if self.job_type == 'Dataset Creation': + self.wandb_run.config.update({"upload_dataset": True}) + self.data_dict = self.check_and_upload_dataset(opt) + + def check_and_upload_dataset(self, opt): + """ + Check if the dataset format is compatible and upload it as W&B artifact + + arguments: + opt (namespace)-- Commandline arguments for current run + + returns: + Updated dataset info dictionary where local dataset paths are replaced by WAND_ARFACT_PREFIX links. + """ + assert wandb, 'Install wandb to upload dataset' + config_path = self.log_dataset_artifact(opt.data, opt.single_cls, + 'YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem) + with open(config_path, errors='ignore') as f: + wandb_data_dict = yaml.safe_load(f) + return wandb_data_dict + + def setup_training(self, opt): + """ + Setup the necessary processes for training YOLO models: + - Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX + - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded + - Setup log_dict, initialize bbox_interval + + arguments: + opt (namespace) -- commandline arguments for this run + + """ + self.log_dict, self.current_epoch = {}, 0 + self.bbox_interval = opt.bbox_interval + if isinstance(opt.resume, str): + modeldir, _ = self.download_model_artifact(opt) + if modeldir: + self.weights = Path(modeldir) / "last.pt" + config = self.wandb_run.config + opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp, opt.imgsz = str( + self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs,\ + config.hyp, config.imgsz + data_dict = self.data_dict + if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download + self.train_artifact_path, self.train_artifact = self.download_dataset_artifact( + data_dict.get('train'), opt.artifact_alias) + self.val_artifact_path, self.val_artifact = self.download_dataset_artifact( + data_dict.get('val'), opt.artifact_alias) + + if self.train_artifact_path is not None: + train_path = Path(self.train_artifact_path) / 'data/images/' + data_dict['train'] = str(train_path) + if self.val_artifact_path is not None: + val_path = Path(self.val_artifact_path) / 'data/images/' + data_dict['val'] = str(val_path) + + if self.val_artifact is not None: + self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation") + columns = ["epoch", "id", "ground truth", "prediction"] + columns.extend(self.data_dict['names']) + self.result_table = wandb.Table(columns) + self.val_table = self.val_artifact.get("val") + if self.val_table_path_map is None: + self.map_val_table_path() + if opt.bbox_interval == -1: + self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1 + if opt.evolve or opt.noplots: + self.bbox_interval = opt.bbox_interval = opt.epochs + 1 # disable bbox_interval + train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None + # Update the the data_dict to point to local artifacts dir + if train_from_artifact: + self.data_dict = data_dict + + def download_dataset_artifact(self, path, alias): + """ + download the model checkpoint artifact if the path starts with WANDB_ARTIFACT_PREFIX + + arguments: + path -- path of the dataset to be used for training + alias (str)-- alias of the artifact to be download/used for training + + returns: + (str, wandb.Artifact) -- path of the downladed dataset and it's corresponding artifact object if dataset + is found otherwise returns (None, None) + """ + if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX): + artifact_path = Path(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias) + dataset_artifact = wandb.use_artifact(artifact_path.as_posix().replace("\\", "/")) + assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'" + datadir = dataset_artifact.download() + return datadir, dataset_artifact + return None, None + + def download_model_artifact(self, opt): + """ + download the model checkpoint artifact if the resume path starts with WANDB_ARTIFACT_PREFIX + + arguments: + opt (namespace) -- Commandline arguments for this run + """ + if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): + model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest") + assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist' + modeldir = model_artifact.download() + # epochs_trained = model_artifact.metadata.get('epochs_trained') + total_epochs = model_artifact.metadata.get('total_epochs') + is_finished = total_epochs is None + assert not is_finished, 'training is finished, can only resume incomplete runs.' + return modeldir, model_artifact + return None, None + + def log_model(self, path, opt, epoch, fitness_score, best_model=False): + """ + Log the model checkpoint as W&B artifact + + arguments: + path (Path) -- Path of directory containing the checkpoints + opt (namespace) -- Command line arguments for this run + epoch (int) -- Current epoch number + fitness_score (float) -- fitness score for current epoch + best_model (boolean) -- Boolean representing if the current checkpoint is the best yet. + """ + model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', + type='model', + metadata={ + 'original_url': str(path), + 'epochs_trained': epoch + 1, + 'save period': opt.save_period, + 'project': opt.project, + 'total_epochs': opt.epochs, + 'fitness_score': fitness_score}) + model_artifact.add_file(str(path / 'last.pt'), name='last.pt') + wandb.log_artifact(model_artifact, + aliases=['latest', 'last', 'epoch ' + str(self.current_epoch), 'best' if best_model else '']) + LOGGER.info(f"Saving model artifact on epoch {epoch + 1}") + + def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False): + """ + Log the dataset as W&B artifact and return the new data file with W&B links + + arguments: + data_file (str) -- the .yaml file with information about the dataset like - path, classes etc. + single_class (boolean) -- train multi-class data as single-class + project (str) -- project name. Used to construct the artifact path + overwrite_config (boolean) -- overwrites the data.yaml file if set to true otherwise creates a new + file with _wandb postfix. Eg -> data_wandb.yaml + + returns: + the new .yaml file with artifact links. it can be used to start training directly from artifacts + """ + upload_dataset = self.wandb_run.config.upload_dataset + log_val_only = isinstance(upload_dataset, str) and upload_dataset == 'val' + self.data_dict = check_dataset(data_file) # parse and check + data = dict(self.data_dict) + nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names']) + names = {k: v for k, v in enumerate(names)} # to index dictionary + + # log train set + if not log_val_only: + self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(data['train'], rect=True, batch_size=1), + names, + name='train') if data.get('train') else None + if data.get('train'): + data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train') + + self.val_artifact = self.create_dataset_table( + LoadImagesAndLabels(data['val'], rect=True, batch_size=1), names, name='val') if data.get('val') else None + if data.get('val'): + data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val') + + path = Path(data_file) + # create a _wandb.yaml file with artifacts links if both train and test set are logged + if not log_val_only: + path = (path.stem if overwrite_config else path.stem + '_wandb') + '.yaml' # updated data.yaml path + path = ROOT / 'data' / path + data.pop('download', None) + data.pop('path', None) + with open(path, 'w') as f: + yaml.safe_dump(data, f) + LOGGER.info(f"Created dataset config file {path}") + + if self.job_type == 'Training': # builds correct artifact pipeline graph + if not log_val_only: + self.wandb_run.log_artifact( + self.train_artifact) # calling use_artifact downloads the dataset. NOT NEEDED! + self.wandb_run.use_artifact(self.val_artifact) + self.val_artifact.wait() + self.val_table = self.val_artifact.get('val') + self.map_val_table_path() + else: + self.wandb_run.log_artifact(self.train_artifact) + self.wandb_run.log_artifact(self.val_artifact) + return path + + def map_val_table_path(self): + """ + Map the validation dataset Table like name of file -> it's id in the W&B Table. + Useful for - referencing artifacts for evaluation. + """ + self.val_table_path_map = {} + LOGGER.info("Mapping dataset") + for i, data in enumerate(tqdm(self.val_table.data)): + self.val_table_path_map[data[3]] = data[0] + + def create_dataset_table(self, dataset: LoadImagesAndLabels, class_to_id: Dict[int, str], name: str = 'dataset'): + """ + Create and return W&B artifact containing W&B Table of the dataset. + + arguments: + dataset -- instance of LoadImagesAndLabels class used to iterate over the data to build Table + class_to_id -- hash map that maps class ids to labels + name -- name of the artifact + + returns: + dataset artifact to be logged or used + """ + # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging + artifact = wandb.Artifact(name=name, type="dataset") + img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None + img_files = tqdm(dataset.im_files) if not img_files else img_files + for img_file in img_files: + if Path(img_file).is_dir(): + artifact.add_dir(img_file, name='data/images') + labels_path = 'labels'.join(dataset.path.rsplit('images', 1)) + artifact.add_dir(labels_path, name='data/labels') + else: + artifact.add_file(img_file, name='data/images/' + Path(img_file).name) + label_file = Path(img2label_paths([img_file])[0]) + artifact.add_file(str(label_file), name='data/labels/' + + label_file.name) if label_file.exists() else None + table = wandb.Table(columns=["id", "train_image", "Classes", "name"]) + class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()]) + for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)): + box_data, img_classes = [], {} + for cls, *xywh in labels[:, 1:].tolist(): + cls = int(cls) + box_data.append({ + "position": { + "middle": [xywh[0], xywh[1]], + "width": xywh[2], + "height": xywh[3]}, + "class_id": cls, + "box_caption": "%s" % (class_to_id[cls])}) + img_classes[cls] = class_to_id[cls] + boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space + table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()), + Path(paths).name) + artifact.add(table, name) + return artifact + + def log_training_progress(self, predn, path, names): + """ + Build evaluation Table. Uses reference from validation dataset table. + + arguments: + predn (list): list of predictions in the native space in the format - [xmin, ymin, xmax, ymax, confidence, class] + path (str): local path of the current evaluation image + names (dict(int, str)): hash map that maps class ids to labels + """ + class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()]) + box_data = [] + avg_conf_per_class = [0] * len(self.data_dict['names']) + pred_class_count = {} + for *xyxy, conf, cls in predn.tolist(): + if conf >= 0.25: + cls = int(cls) + box_data.append({ + "position": { + "minX": xyxy[0], + "minY": xyxy[1], + "maxX": xyxy[2], + "maxY": xyxy[3]}, + "class_id": cls, + "box_caption": f"{names[cls]} {conf:.3f}", + "scores": { + "class_score": conf}, + "domain": "pixel"}) + avg_conf_per_class[cls] += conf + + if cls in pred_class_count: + pred_class_count[cls] += 1 + else: + pred_class_count[cls] = 1 + + for pred_class in pred_class_count.keys(): + avg_conf_per_class[pred_class] = avg_conf_per_class[pred_class] / pred_class_count[pred_class] + + boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space + id = self.val_table_path_map[Path(path).name] + self.result_table.add_data(self.current_epoch, id, self.val_table.data[id][1], + wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set), + *avg_conf_per_class) + + def val_one_image(self, pred, predn, path, names, im): + """ + Log validation data for one image. updates the result Table if validation dataset is uploaded and log bbox media panel + + arguments: + pred (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class] + predn (list): list of predictions in the native space - [xmin, ymin, xmax, ymax, confidence, class] + path (str): local path of the current evaluation image + """ + if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact + self.log_training_progress(predn, path, names) + + if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0: + if self.current_epoch % self.bbox_interval == 0: + box_data = [{ + "position": { + "minX": xyxy[0], + "minY": xyxy[1], + "maxX": xyxy[2], + "maxY": xyxy[3]}, + "class_id": int(cls), + "box_caption": f"{names[int(cls)]} {conf:.3f}", + "scores": { + "class_score": conf}, + "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()] + boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space + self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name)) + + def log(self, log_dict): + """ + save the metrics to the logging dictionary + + arguments: + log_dict (Dict) -- metrics/media to be logged in current step + """ + if self.wandb_run: + for key, value in log_dict.items(): + self.log_dict[key] = value + + def end_epoch(self, best_result=False): + """ + commit the log_dict, model artifacts and Tables to W&B and flush the log_dict. + + arguments: + best_result (boolean): Boolean representing if the result of this evaluation is best or not + """ + if self.wandb_run: + with all_logging_disabled(): + if self.bbox_media_panel_images: + self.log_dict["BoundingBoxDebugger"] = self.bbox_media_panel_images + try: + wandb.log(self.log_dict) + except BaseException as e: + LOGGER.info( + f"An error occurred in wandb logger. The training will proceed without interruption. More info\n{e}" + ) + self.wandb_run.finish() + self.wandb_run = None + + self.log_dict = {} + self.bbox_media_panel_images = [] + if self.result_artifact: + self.result_artifact.add(self.result_table, 'result') + wandb.log_artifact(self.result_artifact, + aliases=[ + 'latest', 'last', 'epoch ' + str(self.current_epoch), + ('best' if best_result else '')]) + + wandb.log({"evaluation": self.result_table}) + columns = ["epoch", "id", "ground truth", "prediction"] + columns.extend(self.data_dict['names']) + self.result_table = wandb.Table(columns) + self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation") + + def finish_run(self): + """ + Log metrics if any and finish the current W&B run + """ + if self.wandb_run: + if self.log_dict: + with all_logging_disabled(): + wandb.log(self.log_dict) + wandb.run.finish() + + +@contextmanager +def all_logging_disabled(highest_level=logging.CRITICAL): + """ source - https://gist.github.com/simon-weber/7853144 + A context manager that will prevent any logging messages triggered during the body from being processed. + :param highest_level: the maximum logging level in use. + This would only need to be changed if a custom level greater than CRITICAL is defined. + """ + previous_level = logging.root.manager.disable + logging.disable(highest_level) + try: + yield + finally: + logging.disable(previous_level) diff --git a/utils/loss.py b/utils/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..0ec21f8ae7950656084b0529343922ed74d4e4df --- /dev/null +++ b/utils/loss.py @@ -0,0 +1,363 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils.metrics import bbox_iou +from utils.torch_utils import de_parallel + + +def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + # return positive, negative label smoothing BCE targets + return 1.0 - 0.5 * eps, 0.5 * eps + + +class BCEBlurWithLogitsLoss(nn.Module): + # BCEwithLogitLoss() with reduced missing label effects. + def __init__(self, alpha=0.05): + super().__init__() + self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() + self.alpha = alpha + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + pred = torch.sigmoid(pred) # prob from logits + dx = pred - true # reduce only missing label effects + # dx = (pred - true).abs() # reduce missing label and false label effects + alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) + loss *= alpha_factor + return loss.mean() + + +class FocalLoss(nn.Module): + # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super().__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = 'none' # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = torch.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == 'mean': + return loss.mean() + elif self.reduction == 'sum': + return loss.sum() + else: # 'none' + return loss + + +class QFocalLoss(nn.Module): + # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super().__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = 'none' # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + + pred_prob = torch.sigmoid(pred) # prob from logits + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = torch.abs(true - pred_prob) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == 'mean': + return loss.mean() + elif self.reduction == 'sum': + return loss.sum() + else: # 'none' + return loss + + +class ComputeLoss: + sort_obj_iou = False + + # Compute losses + def __init__(self, model, autobalance=False): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) + BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets + + # Focal loss + g = h['fl_gamma'] # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index + self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.anchors = m.anchors + self.device = device + + def __call__(self, p, targets): # predictions, targets + bs = p[0].shape[0] # batch size + loss = torch.zeros(3, device=self.device) # [box, obj, cls] losses + tcls, tbox, indices = self.build_targets(p, targets) # targets + + # Losses + for i, pi in enumerate(p): # layer index, layer predictions + b, gj, gi = indices[i] # image, anchor, gridy, gridx + tobj = torch.zeros((pi.shape[0], pi.shape[2], pi.shape[3]), dtype=pi.dtype, device=self.device) # tgt obj + + n_labels = b.shape[0] # number of labels + if n_labels: + # pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # faster, requires torch 1.8.0 + pxy, pwh, _, pcls = pi[b, :, gj, gi].split((2, 2, 1, self.nc), 1) # target-subset of predictions + + # Regression + # pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] + # pwh = (0.0 + (pwh - 1.09861).sigmoid() * 4) * anchors[i] + # pwh = (0.33333 + (pwh - 1.09861).sigmoid() * 2.66667) * anchors[i] + # pwh = (0.25 + (pwh - 1.38629).sigmoid() * 3.75) * anchors[i] + # pwh = (0.20 + (pwh - 1.60944).sigmoid() * 4.8) * anchors[i] + # pwh = (0.16667 + (pwh - 1.79175).sigmoid() * 5.83333) * anchors[i] + pxy = pxy.sigmoid() * 1.6 - 0.3 + pwh = (0.2 + pwh.sigmoid() * 4.8) * self.anchors[i] + pbox = torch.cat((pxy, pwh), 1) # predicted box + iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) + loss[0] += (1.0 - iou).mean() # box loss + + # Objectness + iou = iou.detach().clamp(0).type(tobj.dtype) + if self.sort_obj_iou: + j = iou.argsort() + b, gj, gi, iou = b[j], gj[j], gi[j], iou[j] + if self.gr < 1: + iou = (1.0 - self.gr) + self.gr * iou + tobj[b, gj, gi] = iou # iou ratio + + # Classification + if self.nc > 1: # cls loss (only if multiple classes) + t = torch.full_like(pcls, self.cn, device=self.device) # targets + t[range(n_labels), tcls[i]] = self.cp + loss[2] += self.BCEcls(pcls, t) # cls loss + + obji = self.BCEobj(pi[:, 4], tobj) + loss[1] += obji * self.balance[i] # obj loss + if self.autobalance: + self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() + + if self.autobalance: + self.balance = [x / self.balance[self.ssi] for x in self.balance] + loss[0] *= self.hyp['box'] + loss[1] *= self.hyp['obj'] + loss[2] *= self.hyp['cls'] + return loss.sum() * bs, loss.detach() # [box, obj, cls] losses + + def build_targets(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + nt = targets.shape[0] # number of anchors, targets + tcls, tbox, indices = [], [], [] + gain = torch.ones(6, device=self.device) # normalized to gridspace gain + + g = 0.3 # bias + off = torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=self.device).float() * g # offsets + + for i in range(self.nl): + shape = p[i].shape + gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain + + # Match targets to anchors + t = targets * gain # shape(3,n,7) + if nt: + # Matches + r = t[..., 4:6] / self.anchors[i] # wh ratio + j = torch.max(r, 1 / r).max(1)[0] < self.hyp['anchor_t'] # compare + # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) + t = t[j] # filter + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + j, k = ((gxy % 1 < g) & (gxy > 1)).T + l, m = ((gxi % 1 < g) & (gxi > 1)).T + j = torch.stack((torch.ones_like(j), j, k, l, m)) + t = t.repeat((5, 1, 1))[j] + offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] + else: + t = targets[0] + offsets = 0 + + # Define + bc, gxy, gwh = t.chunk(3, 1) # (image, class), grid xy, grid wh + b, c = bc.long().T # image, class + gij = (gxy - offsets).long() + gi, gj = gij.T # grid indices + + # Append + indices.append((b, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, grid_y, grid_x indices + tbox.append(torch.cat((gxy - gij, gwh), 1)) # box + tcls.append(c) # class + + return tcls, tbox, indices + + +class ComputeLoss_NEW: + sort_obj_iou = False + + # Compute losses + def __init__(self, model, autobalance=False): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) + BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets + + # Focal loss + g = h['fl_gamma'] # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index + self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.anchors = m.anchors + self.device = device + self.BCE_base = nn.BCEWithLogitsLoss(reduction='none') + + def __call__(self, p, targets): # predictions, targets + tcls, tbox, indices = self.build_targets(p, targets) # targets + bs = p[0].shape[0] # batch size + n_labels = targets.shape[0] # number of labels + loss = torch.zeros(3, device=self.device) # [box, obj, cls] losses + + # Compute all losses + all_loss = [] + for i, pi in enumerate(p): # layer index, layer predictions + b, gj, gi = indices[i] # image, anchor, gridy, gridx + if n_labels: + pxy, pwh, pobj, pcls = pi[b, :, gj, gi].split((2, 2, 1, self.nc), 2) # target-subset of predictions + + # Regression + pbox = torch.cat((pxy.sigmoid() * 1.6 - 0.3, (0.2 + pwh.sigmoid() * 4.8) * self.anchors[i]), 2) + iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(predicted_box, target_box) + obj_target = iou.detach().clamp(0).type(pi.dtype) # objectness targets + + all_loss.append([(1.0 - iou) * self.hyp['box'], + self.BCE_base(pobj.squeeze(), torch.ones_like(obj_target)) * self.hyp['obj'], + self.BCE_base(pcls, F.one_hot(tcls[i], self.nc).float()).mean(2) * self.hyp['cls'], + obj_target, + tbox[i][..., 2] > 0.0]) # valid + + # Lowest 3 losses per label + n_assign = 4 # top n matches + cat_loss = [torch.cat(x, 1) for x in zip(*all_loss)] + ij = torch.zeros_like(cat_loss[0]).bool() # top 3 mask + sum_loss = cat_loss[0] + cat_loss[2] + for col in torch.argsort(sum_loss, dim=1).T[:n_assign]: + # ij[range(n_labels), col] = True + ij[range(n_labels), col] = cat_loss[4][range(n_labels), col] + loss[0] = cat_loss[0][ij].mean() * self.nl # box loss + loss[2] = cat_loss[2][ij].mean() * self.nl # cls loss + + # Obj loss + for i, (h, pi) in enumerate(zip(ij.chunk(self.nl, 1), p)): # layer index, layer predictions + b, gj, gi = indices[i] # image, anchor, gridy, gridx + tobj = torch.zeros((pi.shape[0], pi.shape[2], pi.shape[3]), dtype=pi.dtype, device=self.device) # obj + if n_labels: # if any labels + tobj[b[h], gj[h], gi[h]] = all_loss[i][3][h] + loss[1] += self.BCEobj(pi[:, 4], tobj) * (self.balance[i] * self.hyp['obj']) + + return loss.sum() * bs, loss.detach() # [box, obj, cls] losses + + def build_targets(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + nt = targets.shape[0] # number of anchors, targets + tcls, tbox, indices = [], [], [] + gain = torch.ones(6, device=self.device) # normalized to gridspace gain + + g = 0.3 # bias + off = torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=self.device).float() # offsets + + for i in range(self.nl): + shape = p[i].shape + gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain + + # Match targets to anchors + t = targets * gain # shape(3,n,7) + if nt: + # # Matches + r = t[..., 4:6] / self.anchors[i] # wh ratio + a = torch.max(r, 1 / r).max(1)[0] < self.hyp['anchor_t'] # compare + # a = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) + # t = t[a] # filter + + # # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + j, k = ((gxy % 1 < g) & (gxy > 1)).T + l, m = ((gxi % 1 < g) & (gxi > 1)).T + j = torch.stack((torch.ones_like(j), j, k, l, m)) & a + t = t.repeat((5, 1, 1)) + offsets = torch.zeros_like(gxy)[None] + off[:, None] + t[..., 4:6][~j] = 0.0 # move unsuitable targets far away + else: + t = targets[0] + offsets = 0 + + # Define + bc, gxy, gwh = t.chunk(3, 2) # (image, class), grid xy, grid wh + b, c = bc.long().transpose(0, 2).contiguous() # image, class + gij = (gxy - offsets).long() + gi, gj = gij.transpose(0, 2).contiguous() # grid indices + + # Append + indices.append((b, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, grid_y, grid_x indices + tbox.append(torch.cat((gxy - gij, gwh), 2).permute(1, 0, 2).contiguous()) # box + tcls.append(c) # class + + # # Unique + # n1 = torch.cat((b.view(-1, 1), tbox[i].view(-1, 4)), 1).shape[0] + # n2 = tbox[i].view(-1, 4).unique(dim=0).shape[0] + # print(f'targets-unique {n1}-{n2} diff={n1-n2}') + + return tcls, tbox, indices diff --git a/utils/loss_tal.py b/utils/loss_tal.py new file mode 100644 index 0000000000000000000000000000000000000000..9f20c787b1c4ec801de946fc1cfea95c8adfd27d --- /dev/null +++ b/utils/loss_tal.py @@ -0,0 +1,215 @@ +import os + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils.general import xywh2xyxy +from utils.metrics import bbox_iou +from utils.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist +from utils.tal.assigner import TaskAlignedAssigner +from utils.torch_utils import de_parallel + + +def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + # return positive, negative label smoothing BCE targets + return 1.0 - 0.5 * eps, 0.5 * eps + + +class VarifocalLoss(nn.Module): + # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 + def __init__(self): + super().__init__() + + def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): + weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label + with torch.cuda.amp.autocast(enabled=False): + loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), + reduction="none") * weight).sum() + return loss + + +class FocalLoss(nn.Module): + # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super().__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = "none" # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = torch.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == "mean": + return loss.mean() + elif self.reduction == "sum": + return loss.sum() + else: # 'none' + return loss + + +class BboxLoss(nn.Module): + def __init__(self, reg_max, use_dfl=False): + super().__init__() + self.reg_max = reg_max + self.use_dfl = use_dfl + + def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): + # iou loss + bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) + pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) + target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) + bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) + + iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) + loss_iou = 1.0 - iou + + loss_iou *= bbox_weight + loss_iou = loss_iou.sum() / target_scores_sum + + # dfl loss + if self.use_dfl: + dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) + pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) + target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) + target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) + loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight + loss_dfl = loss_dfl.sum() / target_scores_sum + else: + loss_dfl = torch.tensor(0.0).to(pred_dist.device) + + return loss_iou, loss_dfl, iou + + def _df_loss(self, pred_dist, target): + target_left = target.to(torch.long) + target_right = target_left + 1 + weight_left = target_right.to(torch.float) - target + weight_right = 1 - weight_left + loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( + target_left.shape) * weight_left + loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), + reduction="none").view(target_left.shape) * weight_right + return (loss_left + loss_right).mean(-1, keepdim=True) + + +class ComputeLoss: + # Compute losses + def __init__(self, model, use_dfl=True): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls = FocalLoss(BCEcls, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.BCEcls = BCEcls + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.no = m.no + self.reg_max = m.reg_max + self.device = device + + self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 + self.use_dfl = use_dfl + + def preprocess(self, targets, batch_size, scale_tensor): + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, p, targets, img=None, epoch=0): + loss = torch.zeros(3, device=self.device) # box, cls, dfl + feats = p[1] if isinstance(p, tuple) else p + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size, grid_size = pred_scores.shape[:2] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + + target_labels, target_bboxes, target_scores, fg_mask = self.assigner( + pred_scores.detach().sigmoid(), + (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_bboxes /= stride_tensor + target_scores_sum = max(target_scores.sum(), 1) + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[1] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + # bbox loss + if fg_mask.sum(): + loss[0], loss[2], iou = self.bbox_loss(pred_distri, + pred_bboxes, + anchor_points, + target_bboxes, + target_scores, + target_scores_sum, + fg_mask) + + loss[0] *= 7.5 # box gain + loss[1] *= 0.5 # cls gain + loss[2] *= 1.5 # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) diff --git a/utils/loss_tal_dual.py b/utils/loss_tal_dual.py new file mode 100644 index 0000000000000000000000000000000000000000..259e7888d17c70e97efc31db66dfa8a2bd1faef7 --- /dev/null +++ b/utils/loss_tal_dual.py @@ -0,0 +1,385 @@ +import os + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils.general import xywh2xyxy +from utils.metrics import bbox_iou +from utils.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist +from utils.tal.assigner import TaskAlignedAssigner +from utils.torch_utils import de_parallel + + +def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + # return positive, negative label smoothing BCE targets + return 1.0 - 0.5 * eps, 0.5 * eps + + +class VarifocalLoss(nn.Module): + # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 + def __init__(self): + super().__init__() + + def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): + weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label + with torch.cuda.amp.autocast(enabled=False): + loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), + reduction="none") * weight).sum() + return loss + + +class FocalLoss(nn.Module): + # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super().__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = "none" # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = torch.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == "mean": + return loss.mean() + elif self.reduction == "sum": + return loss.sum() + else: # 'none' + return loss + + +class BboxLoss(nn.Module): + def __init__(self, reg_max, use_dfl=False): + super().__init__() + self.reg_max = reg_max + self.use_dfl = use_dfl + + def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): + # iou loss + bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) + pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) + target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) + bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) + + iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) + loss_iou = 1.0 - iou + + loss_iou *= bbox_weight + loss_iou = loss_iou.sum() / target_scores_sum + + # dfl loss + if self.use_dfl: + dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) + pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) + target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) + target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) + loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight + loss_dfl = loss_dfl.sum() / target_scores_sum + else: + loss_dfl = torch.tensor(0.0).to(pred_dist.device) + + return loss_iou, loss_dfl, iou + + def _df_loss(self, pred_dist, target): + target_left = target.to(torch.long) + target_right = target_left + 1 + weight_left = target_right.to(torch.float) - target + weight_right = 1 - weight_left + loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( + target_left.shape) * weight_left + loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), + reduction="none").view(target_left.shape) * weight_right + return (loss_left + loss_right).mean(-1, keepdim=True) + + +class ComputeLoss: + # Compute losses + def __init__(self, model, use_dfl=True): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls = FocalLoss(BCEcls, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.BCEcls = BCEcls + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.no = m.no + self.reg_max = m.reg_max + self.device = device + + self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.assigner2 = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.bbox_loss2 = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 + self.use_dfl = use_dfl + + def preprocess(self, targets, batch_size, scale_tensor): + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, p, targets, img=None, epoch=0): + loss = torch.zeros(3, device=self.device) # box, cls, dfl + feats = p[1][0] if isinstance(p, tuple) else p[0] + feats2 = p[1][1] if isinstance(p, tuple) else p[1] + + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + + pred_distri2, pred_scores2 = torch.cat([xi.view(feats2[0].shape[0], self.no, -1) for xi in feats2], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() + pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size, grid_size = pred_scores.shape[:2] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) + + target_labels, target_bboxes, target_scores, fg_mask = self.assigner( + pred_scores.detach().sigmoid(), + (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + target_labels2, target_bboxes2, target_scores2, fg_mask2 = self.assigner2( + pred_scores2.detach().sigmoid(), + (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_bboxes /= stride_tensor + target_scores_sum = max(target_scores.sum(), 1) + target_bboxes2 /= stride_tensor + target_scores_sum2 = max(target_scores2.sum(), 1) + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[1] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + loss[1] *= 0.25 + loss[1] += self.BCEcls(pred_scores2, target_scores2.to(dtype)).sum() / target_scores_sum2 # BCE + + # bbox loss + if fg_mask.sum(): + loss[0], loss[2], iou = self.bbox_loss(pred_distri, + pred_bboxes, + anchor_points, + target_bboxes, + target_scores, + target_scores_sum, + fg_mask) + loss[0] *= 0.25 + loss[2] *= 0.25 + if fg_mask2.sum(): + loss0_, loss2_, iou2 = self.bbox_loss2(pred_distri2, + pred_bboxes2, + anchor_points, + target_bboxes2, + target_scores2, + target_scores_sum2, + fg_mask2) + loss[0] += loss0_ + loss[2] += loss2_ + + loss[0] *= 7.5 # box gain + loss[1] *= 0.5 # cls gain + loss[2] *= 1.5 # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + +class ComputeLossLH: + # Compute losses + def __init__(self, model, use_dfl=True): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls = FocalLoss(BCEcls, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.BCEcls = BCEcls + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.no = m.no + self.reg_max = m.reg_max + self.device = device + + self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 + self.use_dfl = use_dfl + + def preprocess(self, targets, batch_size, scale_tensor): + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, p, targets, img=None, epoch=0): + loss = torch.zeros(3, device=self.device) # box, cls, dfl + feats = p[1][0] if isinstance(p, tuple) else p[0] + feats2 = p[1][1] if isinstance(p, tuple) else p[1] + + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + + pred_distri2, pred_scores2 = torch.cat([xi.view(feats2[0].shape[0], self.no, -1) for xi in feats2], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() + pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size, grid_size = pred_scores.shape[:2] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) + + target_labels, target_bboxes, target_scores, fg_mask = self.assigner( + pred_scores2.detach().sigmoid(), + (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_bboxes /= stride_tensor + target_scores_sum = target_scores.sum() + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[1] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + loss[1] *= 0.25 + loss[1] += self.BCEcls(pred_scores2, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + # bbox loss + if fg_mask.sum(): + loss[0], loss[2], iou = self.bbox_loss(pred_distri, + pred_bboxes, + anchor_points, + target_bboxes, + target_scores, + target_scores_sum, + fg_mask) + loss[0] *= 0.25 + loss[2] *= 0.25 + if fg_mask.sum(): + loss0_, loss2_, iou2 = self.bbox_loss(pred_distri2, + pred_bboxes2, + anchor_points, + target_bboxes, + target_scores, + target_scores_sum, + fg_mask) + loss[0] += loss0_ + loss[2] += loss2_ + + loss[0] *= 7.5 # box gain + loss[1] *= 0.5 # cls gain + loss[2] *= 1.5 # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) diff --git a/utils/loss_tal_triple.py b/utils/loss_tal_triple.py new file mode 100644 index 0000000000000000000000000000000000000000..1ed821983e0a959dfff70575adacb2aed8ae6a5c --- /dev/null +++ b/utils/loss_tal_triple.py @@ -0,0 +1,282 @@ +import os + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils.general import xywh2xyxy +from utils.metrics import bbox_iou +from utils.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist +from utils.tal.assigner import TaskAlignedAssigner +from utils.torch_utils import de_parallel + + +def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + # return positive, negative label smoothing BCE targets + return 1.0 - 0.5 * eps, 0.5 * eps + + +class VarifocalLoss(nn.Module): + # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 + def __init__(self): + super().__init__() + + def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): + weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label + with torch.cuda.amp.autocast(enabled=False): + loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), + reduction="none") * weight).sum() + return loss + + +class FocalLoss(nn.Module): + # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super().__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = "none" # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = torch.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == "mean": + return loss.mean() + elif self.reduction == "sum": + return loss.sum() + else: # 'none' + return loss + + +class BboxLoss(nn.Module): + def __init__(self, reg_max, use_dfl=False): + super().__init__() + self.reg_max = reg_max + self.use_dfl = use_dfl + + def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): + # iou loss + bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) + pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) + target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) + bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) + + iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) + loss_iou = 1.0 - iou + + loss_iou *= bbox_weight + loss_iou = loss_iou.sum() / target_scores_sum + + # dfl loss + if self.use_dfl: + dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) + pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) + target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) + target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) + loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight + loss_dfl = loss_dfl.sum() / target_scores_sum + else: + loss_dfl = torch.tensor(0.0).to(pred_dist.device) + + return loss_iou, loss_dfl, iou + + def _df_loss(self, pred_dist, target): + target_left = target.to(torch.long) + target_right = target_left + 1 + weight_left = target_right.to(torch.float) - target + weight_right = 1 - weight_left + loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( + target_left.shape) * weight_left + loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), + reduction="none").view(target_left.shape) * weight_right + return (loss_left + loss_right).mean(-1, keepdim=True) + + +class ComputeLoss: + # Compute losses + def __init__(self, model, use_dfl=True): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls = FocalLoss(BCEcls, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.BCEcls = BCEcls + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.no = m.no + self.reg_max = m.reg_max + self.device = device + + self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.assigner2 = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.assigner3 = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.bbox_loss2 = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.bbox_loss3 = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 + self.use_dfl = use_dfl + + def preprocess(self, targets, batch_size, scale_tensor): + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, p, targets, img=None, epoch=0): + loss = torch.zeros(3, device=self.device) # box, cls, dfl + feats = p[1][0] if isinstance(p, tuple) else p[0] + feats2 = p[1][1] if isinstance(p, tuple) else p[1] + feats3 = p[1][2] if isinstance(p, tuple) else p[2] + + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + + pred_distri2, pred_scores2 = torch.cat([xi.view(feats2[0].shape[0], self.no, -1) for xi in feats2], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() + pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() + + pred_distri3, pred_scores3 = torch.cat([xi.view(feats3[0].shape[0], self.no, -1) for xi in feats3], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores3 = pred_scores3.permute(0, 2, 1).contiguous() + pred_distri3 = pred_distri3.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size, grid_size = pred_scores.shape[:2] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + targets = self.preprocess(targets, batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) + pred_bboxes3 = self.bbox_decode(anchor_points, pred_distri3) # xyxy, (b, h*w, 4) + + target_labels, target_bboxes, target_scores, fg_mask = self.assigner( + pred_scores.detach().sigmoid(), + (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + target_labels2, target_bboxes2, target_scores2, fg_mask2 = self.assigner2( + pred_scores2.detach().sigmoid(), + (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + target_labels3, target_bboxes3, target_scores3, fg_mask3 = self.assigner3( + pred_scores3.detach().sigmoid(), + (pred_bboxes3.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_bboxes /= stride_tensor + target_scores_sum = max(target_scores.sum(), 1) + target_bboxes2 /= stride_tensor + target_scores_sum2 = max(target_scores2.sum(), 1) + target_bboxes3 /= stride_tensor + target_scores_sum3 = max(target_scores3.sum(), 1) + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[1] = 0.25 * self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + loss[1] += 0.25 * self.BCEcls(pred_scores2, target_scores2.to(dtype)).sum() / target_scores_sum2 # BCE + loss[1] += self.BCEcls(pred_scores3, target_scores3.to(dtype)).sum() / target_scores_sum3 # BCE + + # bbox loss + if fg_mask.sum(): + loss[0], loss[2], iou = self.bbox_loss(pred_distri, + pred_bboxes, + anchor_points, + target_bboxes, + target_scores, + target_scores_sum, + fg_mask) + loss[0] *= 0.25 + loss[2] *= 0.25 + if fg_mask2.sum(): + loss0_, loss2_, iou2 = self.bbox_loss2(pred_distri2, + pred_bboxes2, + anchor_points, + target_bboxes2, + target_scores2, + target_scores_sum2, + fg_mask2) + loss[0] += 0.25 * loss0_ + loss[2] += 0.25 * loss2_ + if fg_mask3.sum(): + loss0__, loss2__, iou3 = self.bbox_loss3(pred_distri3, + pred_bboxes3, + anchor_points, + target_bboxes3, + target_scores3, + target_scores_sum3, + fg_mask3) + loss[0] += loss0__ + loss[2] += loss2__ + + loss[0] *= 7.5 # box gain + loss[1] *= 0.5 # cls gain + loss[2] *= 1.5 # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) diff --git a/utils/metrics.py b/utils/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..1229f2b10d9754ec82c71881ddc54e8b0313161a --- /dev/null +++ b/utils/metrics.py @@ -0,0 +1,397 @@ +import math +import warnings +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import torch + +from utils import TryExcept, threaded + + +def fitness(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] + return (x[:, :4] * w).sum(1) + + +def smooth(y, f=0.05): + # Box filter of fraction f + nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd) + p = np.ones(nf // 2) # ones padding + yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded + return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed + + +def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=""): + """ Compute the average precision, given the recall and precision curves. + Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. + # Arguments + tp: True positives (nparray, nx1 or nx10). + conf: Objectness value from 0-1 (nparray). + pred_cls: Predicted object classes (nparray). + target_cls: True object classes (nparray). + plot: Plot precision-recall curve at mAP@0.5 + save_dir: Plot save directory + # Returns + The average precision as computed in py-faster-rcnn. + """ + + # Sort by objectness + i = np.argsort(-conf) + tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] + + # Find unique classes + unique_classes, nt = np.unique(target_cls, return_counts=True) + nc = unique_classes.shape[0] # number of classes, number of detections + + # Create Precision-Recall curve and compute AP for each class + px, py = np.linspace(0, 1, 1000), [] # for plotting + ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) + for ci, c in enumerate(unique_classes): + i = pred_cls == c + n_l = nt[ci] # number of labels + n_p = i.sum() # number of predictions + if n_p == 0 or n_l == 0: + continue + + # Accumulate FPs and TPs + fpc = (1 - tp[i]).cumsum(0) + tpc = tp[i].cumsum(0) + + # Recall + recall = tpc / (n_l + eps) # recall curve + r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases + + # Precision + precision = tpc / (tpc + fpc) # precision curve + p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score + + # AP from recall-precision curve + for j in range(tp.shape[1]): + ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) + if plot and j == 0: + py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 + + # Compute F1 (harmonic mean of precision and recall) + f1 = 2 * p * r / (p + r + eps) + names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data + names = dict(enumerate(names)) # to dict + if plot: + plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names) + plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1') + plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision') + plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall') + + i = smooth(f1.mean(0), 0.1).argmax() # max F1 index + p, r, f1 = p[:, i], r[:, i], f1[:, i] + tp = (r * nt).round() # true positives + fp = (tp / (p + eps) - tp).round() # false positives + return tp, fp, p, r, f1, ap, unique_classes.astype(int) + + +def compute_ap(recall, precision): + """ Compute the average precision, given the recall and precision curves + # Arguments + recall: The recall curve (list) + precision: The precision curve (list) + # Returns + Average precision, precision curve, recall curve + """ + + # Append sentinel values to beginning and end + mrec = np.concatenate(([0.0], recall, [1.0])) + mpre = np.concatenate(([1.0], precision, [0.0])) + + # Compute the precision envelope + mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) + + # Integrate area under curve + method = 'interp' # methods: 'continuous', 'interp' + if method == 'interp': + x = np.linspace(0, 1, 101) # 101-point interp (COCO) + ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate + else: # 'continuous' + i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes + ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve + + return ap, mpre, mrec + + +class ConfusionMatrix: + # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix + def __init__(self, nc, conf=0.25, iou_thres=0.45): + self.matrix = np.zeros((nc + 1, nc + 1)) + self.nc = nc # number of classes + self.conf = conf + self.iou_thres = iou_thres + + def process_batch(self, detections, labels): + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + detections (Array[N, 6]), x1, y1, x2, y2, conf, class + labels (Array[M, 5]), class, x1, y1, x2, y2 + Returns: + None, updates confusion matrix accordingly + """ + if detections is None: + gt_classes = labels.int() + for gc in gt_classes: + self.matrix[self.nc, gc] += 1 # background FN + return + + detections = detections[detections[:, 4] > self.conf] + gt_classes = labels[:, 0].int() + detection_classes = detections[:, 5].int() + iou = box_iou(labels[:, 1:], detections[:, :4]) + + x = torch.where(iou > self.iou_thres) + if x[0].shape[0]: + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + else: + matches = np.zeros((0, 3)) + + n = matches.shape[0] > 0 + m0, m1, _ = matches.transpose().astype(int) + for i, gc in enumerate(gt_classes): + j = m0 == i + if n and sum(j) == 1: + self.matrix[detection_classes[m1[j]], gc] += 1 # correct + else: + self.matrix[self.nc, gc] += 1 # true background + + if n: + for i, dc in enumerate(detection_classes): + if not any(m1 == i): + self.matrix[dc, self.nc] += 1 # predicted background + + def matrix(self): + return self.matrix + + def tp_fp(self): + tp = self.matrix.diagonal() # true positives + fp = self.matrix.sum(1) - tp # false positives + # fn = self.matrix.sum(0) - tp # false negatives (missed detections) + return tp[:-1], fp[:-1] # remove background class + + @TryExcept('WARNING ⚠️ ConfusionMatrix plot failure') + def plot(self, normalize=True, save_dir='', names=()): + import seaborn as sn + + array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1E-9) if normalize else 1) # normalize columns + array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) + + fig, ax = plt.subplots(1, 1, figsize=(12, 9), tight_layout=True) + nc, nn = self.nc, len(names) # number of classes, names + sn.set(font_scale=1.0 if nc < 50 else 0.8) # for label size + labels = (0 < nn < 99) and (nn == nc) # apply names to ticklabels + ticklabels = (names + ['background']) if labels else "auto" + with warnings.catch_warnings(): + warnings.simplefilter('ignore') # suppress empty matrix RuntimeWarning: All-NaN slice encountered + sn.heatmap(array, + ax=ax, + annot=nc < 30, + annot_kws={ + "size": 8}, + cmap='Blues', + fmt='.2f', + square=True, + vmin=0.0, + xticklabels=ticklabels, + yticklabels=ticklabels).set_facecolor((1, 1, 1)) + ax.set_ylabel('True') + ax.set_ylabel('Predicted') + ax.set_title('Confusion Matrix') + fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) + plt.close(fig) + + def print(self): + for i in range(self.nc + 1): + print(' '.join(map(str, self.matrix[i]))) + + +class WIoU_Scale: + ''' monotonous: { + None: origin v1 + True: monotonic FM v2 + False: non-monotonic FM v3 + } + momentum: The momentum of running mean''' + + iou_mean = 1. + monotonous = False + _momentum = 1 - 0.5 ** (1 / 7000) + _is_train = True + + def __init__(self, iou): + self.iou = iou + self._update(self) + + @classmethod + def _update(cls, self): + if cls._is_train: cls.iou_mean = (1 - cls._momentum) * cls.iou_mean + \ + cls._momentum * self.iou.detach().mean().item() + + @classmethod + def _scaled_loss(cls, self, gamma=1.9, delta=3): + if isinstance(self.monotonous, bool): + if self.monotonous: + return (self.iou.detach() / self.iou_mean).sqrt() + else: + beta = self.iou.detach() / self.iou_mean + alpha = delta * torch.pow(gamma, beta - delta) + return beta / alpha + return 1 + + +def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, MDPIoU=False, feat_h=640, feat_w=640, eps=1e-7): + # Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4) + + # Get the coordinates of bounding boxes + if xywh: # transform from xywh to xyxy + (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1) + w1_, h1_, w2_, h2_ = w1 / 2, h1 / 2, w2 / 2, h2 / 2 + b1_x1, b1_x2, b1_y1, b1_y2 = x1 - w1_, x1 + w1_, y1 - h1_, y1 + h1_ + b2_x1, b2_x2, b2_y1, b2_y2 = x2 - w2_, x2 + w2_, y2 - h2_, y2 + h2_ + else: # x1, y1, x2, y2 = box1 + b1_x1, b1_y1, b1_x2, b1_y2 = box1.chunk(4, -1) + b2_x1, b2_y1, b2_x2, b2_y2 = box2.chunk(4, -1) + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps + + # Intersection area + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ + (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + + # Union Area + union = w1 * h1 + w2 * h2 - inter + eps + + # IoU + iou = inter / union + if CIoU or DIoU or GIoU: + cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width + ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height + if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 + c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center dist ** 2 + if CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 + v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + with torch.no_grad(): + alpha = v / (v - iou + (1 + eps)) + return iou - (rho2 / c2 + v * alpha) # CIoU + return iou - rho2 / c2 # DIoU + c_area = cw * ch + eps # convex area + return iou - (c_area - union) / c_area # GIoU https://arxiv.org/pdf/1902.09630.pdf + elif MDPIoU: + d1 = (b2_x1 - b1_x1) ** 2 + (b2_y1 - b1_y1) ** 2 + d2 = (b2_x2 - b1_x2) ** 2 + (b2_y2 - b1_y2) ** 2 + mpdiou_hw_pow = feat_h ** 2 + feat_w ** 2 + return iou - d1 / mpdiou_hw_pow - d2 / mpdiou_hw_pow # MPDIoU + return iou # IoU + + +def box_iou(box1, box2, eps=1e-7): + # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: + box1 (Tensor[N, 4]) + box2 (Tensor[M, 4]) + Returns: + iou (Tensor[N, M]): the NxM matrix containing the pairwise + IoU values for every element in boxes1 and boxes2 + """ + + # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) + (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) + inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) + + # IoU = inter / (area1 + area2 - inter) + return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps) + + +def bbox_ioa(box1, box2, eps=1e-7): + """Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2 + box1: np.array of shape(nx4) + box2: np.array of shape(mx4) + returns: np.array of shape(nxm) + """ + + # Get the coordinates of bounding boxes + b1_x1, b1_y1, b1_x2, b1_y2 = box1.T + b2_x1, b2_y1, b2_x2, b2_y2 = box2.T + + # Intersection area + inter_area = (np.minimum(b1_x2[:, None], b2_x2) - np.maximum(b1_x1[:, None], b2_x1)).clip(0) * \ + (np.minimum(b1_y2[:, None], b2_y2) - np.maximum(b1_y1[:, None], b2_y1)).clip(0) + + # box2 area + box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps + + # Intersection over box2 area + return inter_area / box2_area + + +def wh_iou(wh1, wh2, eps=1e-7): + # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 + wh1 = wh1[:, None] # [N,1,2] + wh2 = wh2[None] # [1,M,2] + inter = torch.min(wh1, wh2).prod(2) # [N,M] + return inter / (wh1.prod(2) + wh2.prod(2) - inter + eps) # iou = inter / (area1 + area2 - inter) + + +# Plots ---------------------------------------------------------------------------------------------------------------- + + +@threaded +def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()): + # Precision-recall curve + fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) + py = np.stack(py, axis=1) + + if 0 < len(names) < 21: # display per-class legend if < 21 classes + for i, y in enumerate(py.T): + ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) + else: + ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) + + ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) + ax.set_xlabel('Recall') + ax.set_ylabel('Precision') + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left") + ax.set_title('Precision-Recall Curve') + fig.savefig(save_dir, dpi=250) + plt.close(fig) + + +@threaded +def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabel='Confidence', ylabel='Metric'): + # Metric-confidence curve + fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) + + if 0 < len(names) < 21: # display per-class legend if < 21 classes + for i, y in enumerate(py): + ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) + else: + ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) + + y = smooth(py.mean(0), 0.05) + ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_xlim(0, 1) + ax.set_ylim(0, 1) + ax.legend(bbox_to_anchor=(1.04, 1), loc="upper left") + ax.set_title(f'{ylabel}-Confidence Curve') + fig.savefig(save_dir, dpi=250) + plt.close(fig) diff --git a/utils/panoptic/__init__.py b/utils/panoptic/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..84952a8167bc2975913a6def6b4f027d566552a9 --- /dev/null +++ b/utils/panoptic/__init__.py @@ -0,0 +1 @@ +# init \ No newline at end of file diff --git a/utils/panoptic/augmentations.py b/utils/panoptic/augmentations.py new file mode 100644 index 0000000000000000000000000000000000000000..8e0a95cb840a4101bde8daa8248cddcbd10ac7c3 --- /dev/null +++ b/utils/panoptic/augmentations.py @@ -0,0 +1,183 @@ +import math +import random + +import cv2 +import numpy as np + +from ..augmentations import box_candidates +from ..general import resample_segments, segment2box +from ..metrics import bbox_ioa + + +def mixup(im, labels, segments, seg_cls, semantic_masks, im2, labels2, segments2, seg_cls2, semantic_masks2): + # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf + r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 + im = (im * r + im2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + segments = np.concatenate((segments, segments2), 0) + seg_cls = np.concatenate((seg_cls, seg_cls2), 0) + semantic_masks = np.concatenate((semantic_masks, semantic_masks2), 0) + return im, labels, segments, seg_cls, semantic_masks + + +def random_perspective(im, + targets=(), + segments=(), + semantic_masks = (), + degrees=10, + translate=.1, + scale=.1, + shear=10, + perspective=0.0, + border=(0, 0)): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) + # targets = [cls, xyxy] + + height = im.shape[0] + border[0] * 2 # shape(h,w,c) + width = im.shape[1] + border[1] * 2 + + # Center + C = np.eye(3) + C[0, 2] = -im.shape[1] / 2 # x translation (pixels) + C[1, 2] = -im.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3) + P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3) + a = random.uniform(-degrees, degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - scale, 1 + scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3) + T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) + T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if perspective: + im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) + else: # affine + im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) + + # Visualize + # import matplotlib.pyplot as plt + # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() + # ax[0].imshow(im[:, :, ::-1]) # base + # ax[1].imshow(im2[:, :, ::-1]) # warped + + # Transform label coordinates + n = len(targets) + new_segments = [] + new_semantic_masks = [] + if n: + new = np.zeros((n, 4)) + segments = resample_segments(segments) # upsample + for i, segment in enumerate(segments): + xy = np.ones((len(segment), 3)) + xy[:, :2] = segment + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine + + # clip + new[i] = segment2box(xy, width, height) + new_segments.append(xy) + + semantic_masks = resample_segments(semantic_masks) + for i, semantic_mask in enumerate(semantic_masks): + #if i < n: + # xy = np.ones((len(segments[i]), 3)) + # xy[:, :2] = segments[i] + # xy = xy @ M.T # transform + # xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine + + # new[i] = segment2box(xy, width, height) + # new_segments.append(xy) + + xy_s = np.ones((len(semantic_mask), 3)) + xy_s[:, :2] = semantic_mask + xy_s = xy_s @ M.T # transform + xy_s = (xy_s[:, :2] / xy_s[:, 2:3] if perspective else xy_s[:, :2]) # perspective rescale or affine + + new_semantic_masks.append(xy_s) + + # filter candidates + i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01) + targets = targets[i] + targets[:, 1:5] = new[i] + new_segments = np.array(new_segments)[i] + new_semantic_masks = np.array(new_semantic_masks) + + return im, targets, new_segments, new_semantic_masks + + +def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): + # Resize and pad image while meeting stride-multiple constraints + shape = im.shape[:2] # current shape [height, width] + if isinstance(new_shape, int): + new_shape = (new_shape, new_shape) + + # Scale ratio (new / old) + r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) + if not scaleup: # only scale down, do not scale up (for better val mAP) + r = min(r, 1.0) + + # Compute padding + ratio = r, r # width, height ratios + new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + if auto: # minimum rectangle + dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding + elif scaleFill: # stretch + dw, dh = 0.0, 0.0 + new_unpad = (new_shape[1], new_shape[0]) + ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + + dw /= 2 # divide padding into 2 sides + dh /= 2 + + if shape[::-1] != new_unpad: # resize + im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR) + top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) + left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) + im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + return im, ratio, (dw, dh) + + +def copy_paste(im, labels, segments, seg_cls, semantic_masks, p=0.5): + # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy) + n = len(segments) + if p and n: + h, w, _ = im.shape # height, width, channels + im_new = np.zeros(im.shape, np.uint8) + + # calculate ioa first then select indexes randomly + boxes = np.stack([w - labels[:, 3], labels[:, 2], w - labels[:, 1], labels[:, 4]], axis=-1) # (n, 4) + ioa = bbox_ioa(boxes, labels[:, 1:5]) # intersection over area + indexes = np.nonzero((ioa < 0.30).all(1))[0] # (N, ) + n = len(indexes) + for j in random.sample(list(indexes), k=round(p * n)): + l, box, s = labels[j], boxes[j], segments[j] + labels = np.concatenate((labels, [[l[0], *box]]), 0) + segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) + seg_cls.append(l[0].astype(int)) + semantic_masks.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) + cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED) + + result = cv2.flip(im, 1) # augment segments (flip left-right) + i = cv2.flip(im_new, 1).astype(bool) + im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug + + return im, labels, segments, seg_cls, semantic_masks \ No newline at end of file diff --git a/utils/panoptic/dataloaders.py b/utils/panoptic/dataloaders.py new file mode 100644 index 0000000000000000000000000000000000000000..6f2b1d72bb05c02c482a0eac52d3aae4d698b57f --- /dev/null +++ b/utils/panoptic/dataloaders.py @@ -0,0 +1,478 @@ +import os +import random + +import pickle +from pathlib import Path + +from itertools import repeat +from multiprocessing.pool import Pool, ThreadPool + +import cv2 +import numpy as np +import torch +from torch.utils.data import DataLoader, distributed +from tqdm import tqdm + +from ..augmentations import augment_hsv +from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker, get_hash, verify_image_label, HELP_URL, TQDM_BAR_FORMAT, LOCAL_RANK +from ..general import NUM_THREADS, LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn +from ..torch_utils import torch_distributed_zero_first +from ..coco_utils import annToMask, getCocoIds +from .augmentations import mixup, random_perspective, copy_paste, letterbox + +RANK = int(os.getenv('RANK', -1)) + + +def create_dataloader(path, + imgsz, + batch_size, + stride, + single_cls=False, + hyp=None, + augment=False, + cache=False, + pad=0.0, + rect=False, + rank=-1, + workers=8, + image_weights=False, + close_mosaic=False, + quad=False, + prefix='', + shuffle=False, + mask_downsample_ratio=1, + overlap_mask=False): + if rect and shuffle: + LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') + shuffle = False + with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = LoadImagesAndLabelsAndMasks( + path, + imgsz, + batch_size, + augment=augment, # augmentation + hyp=hyp, # hyperparameters + rect=rect, # rectangular batches + cache_images=cache, + single_cls=single_cls, + stride=int(stride), + pad=pad, + image_weights=image_weights, + prefix=prefix, + downsample_ratio=mask_downsample_ratio, + overlap=overlap_mask) + + batch_size = min(batch_size, len(dataset)) + nd = torch.cuda.device_count() # number of CUDA devices + nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) + #loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates + loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader + generator = torch.Generator() + generator.manual_seed(6148914691236517205 + RANK) + return loader( + dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, + worker_init_fn=seed_worker, + generator=generator, + ), dataset + +def img2stuff_paths(img_paths): + # Define label paths as a function of image paths + sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}stuff{os.sep}' # /images/, /segmentations/ substrings + return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths] + + +class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing + + def __init__( + self, + path, + img_size=640, + batch_size=16, + augment=False, + hyp=None, + rect=False, + image_weights=False, + cache_images=False, + single_cls=False, + stride=32, + pad=0, + min_items=0, + prefix="", + downsample_ratio=1, + overlap=False, + ): + super().__init__( + path, + img_size, + batch_size, + augment, + hyp, + rect, + image_weights, + cache_images, + single_cls, + stride, + pad, + min_items, + prefix) + self.downsample_ratio = downsample_ratio + self.overlap = overlap + + # semantic segmentation + self.coco_ids = getCocoIds() + + # Check cache + self.seg_files = img2stuff_paths(self.im_files) # labels + p = Path(path) + cache_path = (p.with_suffix('') if p.is_file() else Path(self.seg_files[0]).parent) + cache_path = Path(str(cache_path) + '_stuff').with_suffix('.cache') + try: + cache, exists = np.load(cache_path, allow_pickle = True).item(), True # load dict + #assert cache['version'] == self.cache_version # matches current version + #assert cache['hash'] == get_hash(self.seg_files + self.im_files) # identical hash + except Exception: + cache, exists = self.cache_seg_labels(cache_path, prefix), False # run cache ops + + # Display cache + nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupt, total + if exists and LOCAL_RANK in {-1, 0}: + d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupt" + tqdm(None, desc = (prefix + d), total = n, initial = n, bar_format = TQDM_BAR_FORMAT) # display cache results + if cache['msgs']: + LOGGER.info('\n'.join(cache['msgs'])) # display warnings + assert (0 < nf) or (not augment), f'{prefix}No labels found in {cache_path}, can not start training. {HELP_URL}' + + # Read cache + [cache.pop(k) for k in ('hash', 'version', 'msgs')] # remove items + seg_labels, _, self.semantic_masks = zip(*cache.values()) + nl = len(np.concatenate(seg_labels, 0)) # number of labels + assert nl > 0 or not augment, f'{prefix}All labels empty in {cache_path}, can not start training. {HELP_URL}' + + # Update labels + self.seg_cls = [] + include_class = [] # filter labels to include only these classes (optional) + include_class_array = np.array(include_class).reshape(1, -1) + for i, (label, semantic_masks) in enumerate(zip(seg_labels, self.semantic_masks)): + self.seg_cls.append((label[:, 0].astype(int)).tolist()) + if include_class: + j = (label[:, 0:1] == include_class_array).any(1) + if semantic_masks: + self.semantic_masks[i] = semantic_masks[j] + if single_cls: # single-class training, merge all classes into 0 + if semantic_masks: + self.semantic_masks[i][:, 0] = 0 + + def __getitem__(self, index): + index = self.indices[index] # linear, shuffled, or image_weights + + hyp = self.hyp + mosaic = self.mosaic and random.random() < hyp['mosaic'] + masks = [] + if mosaic: + # Load mosaic + img, labels, segments, seg_cls, semantic_masks = self.load_mosaic(index) + shapes = None + + # MixUp augmentation + if random.random() < hyp["mixup"]: + img, labels, segments, seg_cls, semantic_masks = mixup(img, labels, segments, seg_cls, semantic_masks, + *self.load_mosaic(random.randint(0, self.n - 1))) + + else: + # Load image + img, (h0, w0), (h, w) = self.load_image(index) + + # Letterbox + shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + labels = self.labels[index].copy() + # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy + segments = self.segments[index].copy() + if len(segments): + for i_s in range(len(segments)): + segments[i_s] = xyn2xy( + segments[i_s], + ratio[0] * w, + ratio[1] * h, + padw=pad[0], + padh=pad[1], + ) + + seg_cls = self.seg_cls[index].copy() + semantic_masks = self.semantic_masks[index].copy() + #semantic_masks = [xyn2xy(x, ratio[0] * w, ratio[1] * h, padw = pad[0], padh = pad[1]) for x in semantic_masks] + if len(semantic_masks): + for ss in range(len(semantic_masks)): + semantic_masks[ss] = xyn2xy( + semantic_masks[ss], + ratio[0] * w, + ratio[1] * h, + padw = pad[0], + padh = pad[1], + ) + + if labels.size: # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) + + if self.augment: + img, labels, segments, semantic_masks = random_perspective( + img, + labels, + segments=segments, + semantic_masks = semantic_masks, + degrees=hyp["degrees"], + translate=hyp["translate"], + scale=hyp["scale"], + shear=hyp["shear"], + perspective=hyp["perspective"]) + + nl = len(labels) # number of labels + if nl: + labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) + if self.overlap: + masks, sorted_idx = polygons2masks_overlap(img.shape[:2], + segments, + downsample_ratio=self.downsample_ratio) + masks = masks[None] # (640, 640) -> (1, 640, 640) + labels = labels[sorted_idx] + else: + masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) + + masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] // + self.downsample_ratio, img.shape[1] // + self.downsample_ratio)) + semantic_masks = polygons2masks(img.shape[:2], semantic_masks, color = 1, downsample_ratio=self.downsample_ratio) + #semantic_masks = polygons2masks(img.shape[:2], semantic_masks, color = 1, downsample_ratio=1) + semantic_masks = torch.from_numpy(semantic_masks) + # TODO: albumentations support + if self.augment: + # Albumentations + # there are some augmentation that won't change boxes and masks, + # so just be it for now. + img, labels = self.albumentations(img, labels) + nl = len(labels) # update after albumentations + ns = len(semantic_masks) + + # HSV color-space + augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) + + # Flip up-down + if random.random() < hyp["flipud"]: + img = np.flipud(img) + if nl: + labels[:, 2] = 1 - labels[:, 2] + masks = torch.flip(masks, dims=[1]) + if ns: + semantic_masks = torch.flip(semantic_masks, dims = [1]) + + # Flip left-right + if random.random() < hyp["fliplr"]: + img = np.fliplr(img) + if nl: + labels[:, 1] = 1 - labels[:, 1] + masks = torch.flip(masks, dims=[2]) + if ns: + semantic_masks = torch.flip(semantic_masks, dims = [2]) + + # Cutouts # labels = cutout(img, labels, p=0.5) + + labels_out = torch.zeros((nl, 6)) + if nl: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Combine semantic masks + semantic_seg_masks = torch.zeros((len(self.coco_ids), img.shape[0] // self.downsample_ratio, + img.shape[1] // self.downsample_ratio), dtype = torch.uint8) + #semantic_seg_masks = torch.zeros((len(self.coco_ids), img.shape[0], img.shape[1]), dtype = torch.uint8) + for cls_id, semantic_mask in zip(seg_cls, semantic_masks): + semantic_seg_masks[cls_id] = (semantic_seg_masks[cls_id].logical_or(semantic_mask)).int() + + + # Convert + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) + + return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks, semantic_seg_masks) + + def load_mosaic(self, index): + # YOLO 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic + labels4, segments4, seg_cls, semantic_masks4 = [], [], [], [] + s = self.img_size + yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y + + # 3 additional image indices + indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = self.load_image(index) + + # place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + labels, segments, semantic_masks = self.labels[index].copy(), self.segments[index].copy(), self.semantic_masks[index].copy() + + if labels.size: + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padw, padh) for x in segments] + semantic_masks = [xyn2xy(x, w, h, padw, padh) for x in semantic_masks] + labels4.append(labels) + segments4.extend(segments) + seg_cls.extend(self.seg_cls[index].copy()) + semantic_masks4.extend(semantic_masks) + + # Concat/clip labels + labels4 = np.concatenate(labels4, 0) + for i in range(len(semantic_masks4)): + if i < len(segments4): + np.clip(labels4[:, 1:][i], 0, 2 * s, out = labels4[:, 1:][i]) + np.clip(segments4[i], 0, 2 * s, out = segments4[i]) + np.clip(semantic_masks4[i], 0, 2 * s, out = semantic_masks4[i]) + # img4, labels4 = replicate(img4, labels4) # replicate + + # 3 additional image indices + # Augment + img4, labels4, segments4, seg_cls, semantic_masks4 = copy_paste(img4, labels4, segments4, seg_cls, semantic_masks4, p=self.hyp["copy_paste"]) + img4, labels4, segments4, semantic_masks4 = random_perspective(img4, + labels4, + segments4, + semantic_masks4, + degrees=self.hyp["degrees"], + translate=self.hyp["translate"], + scale=self.hyp["scale"], + shear=self.hyp["shear"], + perspective=self.hyp["perspective"], + border=self.mosaic_border) # border to remove + + return img4, labels4, segments4, seg_cls, semantic_masks4 + + def cache_seg_labels(self, path = Path('./labels_stuff.cache'), prefix = ''): + # Cache dataset labels, check images and read shapes + x = {} # dict + nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages + desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..." + with Pool(NUM_THREADS) as pool: + pbar = tqdm(pool.imap(verify_image_label, zip(self.im_files, self.seg_files, repeat(prefix))), + desc = desc, + total = len(self.im_files), + bar_format = TQDM_BAR_FORMAT) + for im_file, lb, shape, segments, nm_f, nf_f, ne_f, nc_f, msg in pbar: + nm += nm_f + nf += nf_f + ne += ne_f + nc += nc_f + if im_file: + x[im_file] = [lb, shape, segments] + if msg: + msgs.append(msg) + pbar.desc = f"{desc}{nf} found, {nm} missing, {ne} empty, {nc} corrupt" + + pbar.close() + if msgs: + LOGGER.info('\n'.join(msgs)) + if nf == 0: + LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. {HELP_URL}') + x['hash'] = get_hash(self.seg_files + self.im_files) + x['results'] = nf, nm, ne, nc, len(self.im_files) + x['msgs'] = msgs # warnings + x['version'] = self.cache_version # cache version + try: + np.save(path, x) # save cache for next time + path.with_suffix('.cache.npy').rename(path) # remove .npy suffix + LOGGER.info(f'{prefix}New cache created: {path}') + except Exception as e: + LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # not writeable + return x + + @staticmethod + def collate_fn(batch): + img, label, path, shapes, masks, semantic_masks = zip(*batch) # transposed + batched_masks = torch.cat(masks, 0) + for i, l in enumerate(label): + l[:, 0] = i # add target image index for build_targets() + return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks, torch.stack(semantic_masks, 0) + + + +def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): + """ + Args: + img_size (tuple): The image size. + polygons (np.ndarray): [N, M], N is the number of polygons, + M is the number of points(Be divided by 2). + """ + mask = np.zeros(img_size, dtype=np.uint8) + polygons = np.asarray(polygons) + polygons = polygons.astype(np.int32) + shape = polygons.shape + polygons = polygons.reshape(shape[0], -1, 2) + cv2.fillPoly(mask, polygons, color=color) + nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) + # NOTE: fillPoly firstly then resize is trying the keep the same way + # of loss calculation when mask-ratio=1. + mask = cv2.resize(mask, (nw, nh)) + return mask + + +def polygons2masks(img_size, polygons, color, downsample_ratio=1): + """ + Args: + img_size (tuple): The image size. + polygons (list[np.ndarray]): each polygon is [N, M], + N is the number of polygons, + M is the number of points(Be divided by 2). + """ + masks = [] + for si in range(len(polygons)): + mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio) + masks.append(mask) + return np.array(masks) + + +def polygons2masks_overlap(img_size, segments, downsample_ratio=1): + """Return a (640, 640) overlap mask.""" + masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), + dtype=np.int32 if len(segments) > 255 else np.uint8) + areas = [] + ms = [] + for si in range(len(segments)): + mask = polygon2mask( + img_size, + [segments[si].reshape(-1)], + downsample_ratio=downsample_ratio, + color=1, + ) + ms.append(mask) + areas.append(mask.sum()) + areas = np.asarray(areas) + index = np.argsort(-areas) + ms = np.array(ms)[index] + for i in range(len(segments)): + mask = ms[i] * (i + 1) + masks = masks + mask + masks = np.clip(masks, a_min=0, a_max=i + 1) + return masks, index diff --git a/utils/panoptic/general.py b/utils/panoptic/general.py new file mode 100644 index 0000000000000000000000000000000000000000..b526333dc5a1b8625d7e6a51ee6ba41818c62adb --- /dev/null +++ b/utils/panoptic/general.py @@ -0,0 +1,137 @@ +import cv2 +import numpy as np +import torch +import torch.nn.functional as F + + +def crop_mask(masks, boxes): + """ + "Crop" predicted masks by zeroing out everything not in the predicted bbox. + Vectorized by Chong (thanks Chong). + + Args: + - masks should be a size [h, w, n] tensor of masks + - boxes should be a size [n, 4] tensor of bbox coords in relative point form + """ + + n, h, w = masks.shape + x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) + r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) + c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) + + return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) + + +def process_mask_upsample(protos, masks_in, bboxes, shape): + """ + Crop after upsample. + proto_out: [mask_dim, mask_h, mask_w] + out_masks: [n, mask_dim], n is number of masks after nms + bboxes: [n, 4], n is number of masks after nms + shape:input_image_size, (h, w) + + return: h, w, n + """ + + c, mh, mw = protos.shape # CHW + masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) + masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW + masks = crop_mask(masks, bboxes) # CHW + return masks.gt_(0.5) + + +def process_mask(protos, masks_in, bboxes, shape, upsample=False): + """ + Crop before upsample. + proto_out: [mask_dim, mask_h, mask_w] + out_masks: [n, mask_dim], n is number of masks after nms + bboxes: [n, 4], n is number of masks after nms + shape:input_image_size, (h, w) + + return: h, w, n + """ + + c, mh, mw = protos.shape # CHW + ih, iw = shape + masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW + + downsampled_bboxes = bboxes.clone() + downsampled_bboxes[:, 0] *= mw / iw + downsampled_bboxes[:, 2] *= mw / iw + downsampled_bboxes[:, 3] *= mh / ih + downsampled_bboxes[:, 1] *= mh / ih + + masks = crop_mask(masks, downsampled_bboxes) # CHW + if upsample: + masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW + return masks.gt_(0.5) + + +def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): + """ + img1_shape: model input shape, [h, w] + img0_shape: origin pic shape, [h, w, 3] + masks: [h, w, num] + """ + # Rescale coordinates (xyxy) from im1_shape to im0_shape + if ratio_pad is None: # calculate from im0_shape + gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new + pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding + else: + pad = ratio_pad[1] + top, left = int(pad[1]), int(pad[0]) # y, x + bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) + + if len(masks.shape) < 2: + raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') + masks = masks[top:bottom, left:right] + # masks = masks.permute(2, 0, 1).contiguous() + # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0] + # masks = masks.permute(1, 2, 0).contiguous() + masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) + + if len(masks.shape) == 2: + masks = masks[:, :, None] + return masks + + +def mask_iou(mask1, mask2, eps=1e-7): + """ + mask1: [N, n] m1 means number of predicted objects + mask2: [M, n] m2 means number of gt objects + Note: n means image_w x image_h + + return: masks iou, [N, M] + """ + intersection = torch.matmul(mask1, mask2.t()).clamp(0) + union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection + return intersection / (union + eps) + + +def masks_iou(mask1, mask2, eps=1e-7): + """ + mask1: [N, n] m1 means number of predicted objects + mask2: [N, n] m2 means number of gt objects + Note: n means image_w x image_h + + return: masks iou, (N, ) + """ + intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) + union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection + return intersection / (union + eps) + + +def masks2segments(masks, strategy='largest'): + # Convert masks(n,160,160) into segments(n,xy) + segments = [] + for x in masks.int().cpu().numpy().astype('uint8'): + c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] + if c: + if strategy == 'concat': # concatenate all segments + c = np.concatenate([x.reshape(-1, 2) for x in c]) + elif strategy == 'largest': # select largest segment + c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) + else: + c = np.zeros((0, 2)) # no segments found + segments.append(c.astype('float32')) + return segments diff --git a/utils/panoptic/loss.py b/utils/panoptic/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..b45b2c27e0a05c275cbc50064288aece3ae3e856 --- /dev/null +++ b/utils/panoptic/loss.py @@ -0,0 +1,186 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..general import xywh2xyxy +from ..loss import FocalLoss, smooth_BCE +from ..metrics import bbox_iou +from ..torch_utils import de_parallel +from .general import crop_mask + + +class ComputeLoss: + # Compute losses + def __init__(self, model, autobalance=False, overlap=False): + self.sort_obj_iou = False + self.overlap = overlap + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + self.device = device + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) + BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets + + # Focal loss + g = h['fl_gamma'] # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index + self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance + self.na = m.na # number of anchors + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.nm = m.nm # number of masks + self.anchors = m.anchors + self.device = device + + def __call__(self, preds, targets, masks): # predictions, targets, model + p, proto = preds + bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width + lcls = torch.zeros(1, device=self.device) + lbox = torch.zeros(1, device=self.device) + lobj = torch.zeros(1, device=self.device) + lseg = torch.zeros(1, device=self.device) + tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets + + # Losses + for i, pi in enumerate(p): # layer index, layer predictions + b, a, gj, gi = indices[i] # image, anchor, gridy, gridx + tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj + + n = b.shape[0] # number of targets + if n: + pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions + + # Box regression + pxy = pxy.sigmoid() * 2 - 0.5 + pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] + pbox = torch.cat((pxy, pwh), 1) # predicted box + iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) + lbox += (1.0 - iou).mean() # iou loss + + # Objectness + iou = iou.detach().clamp(0).type(tobj.dtype) + if self.sort_obj_iou: + j = iou.argsort() + b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j] + if self.gr < 1: + iou = (1.0 - self.gr) + self.gr * iou + tobj[b, a, gj, gi] = iou # iou ratio + + # Classification + if self.nc > 1: # cls loss (only if multiple classes) + t = torch.full_like(pcls, self.cn, device=self.device) # targets + t[range(n), tcls[i]] = self.cp + lcls += self.BCEcls(pcls, t) # BCE + + # Mask regression + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0] + marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized + mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) + for bi in b.unique(): + j = b == bi # matching index + if self.overlap: + mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0) + else: + mask_gti = masks[tidxs[i]][j] + lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j]) + + obji = self.BCEobj(pi[..., 4], tobj) + lobj += obji * self.balance[i] # obj loss + if self.autobalance: + self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() + + if self.autobalance: + self.balance = [x / self.balance[self.ssi] for x in self.balance] + lbox *= self.hyp["box"] + lobj *= self.hyp["obj"] + lcls *= self.hyp["cls"] + lseg *= self.hyp["box"] / bs + + loss = lbox + lobj + lcls + lseg + return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() + + def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): + # Mask loss for one image + pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) + loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") + return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() + + def build_targets(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + na, nt = self.na, targets.shape[0] # number of anchors, targets + tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], [] + gain = torch.ones(8, device=self.device) # normalized to gridspace gain + ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) + if self.overlap: + batch = p[0].shape[0] + ti = [] + for i in range(batch): + num = (targets[:, 0] == i).sum() # find number of targets of each image + ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num) + ti = torch.cat(ti, 1) # (na, nt) + else: + ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1) + targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices + + g = 0.5 # bias + off = torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=self.device).float() * g # offsets + + for i in range(self.nl): + anchors, shape = self.anchors[i], p[i].shape + gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain + + # Match targets to anchors + t = targets * gain # shape(3,n,7) + if nt: + # Matches + r = t[..., 4:6] / anchors[:, None] # wh ratio + j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare + # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) + t = t[j] # filter + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + j, k = ((gxy % 1 < g) & (gxy > 1)).T + l, m = ((gxi % 1 < g) & (gxi > 1)).T + j = torch.stack((torch.ones_like(j), j, k, l, m)) + t = t.repeat((5, 1, 1))[j] + offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] + else: + t = targets[0] + offsets = 0 + + # Define + bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors + (a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class + gij = (gxy - offsets).long() + gi, gj = gij.T # grid indices + + # Append + indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid + tbox.append(torch.cat((gxy - gij, gwh), 1)) # box + anch.append(anchors[a]) # anchors + tcls.append(c) # class + tidxs.append(tidx) + xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized + + return tcls, tbox, indices, anch, tidxs, xywhn diff --git a/utils/panoptic/loss_tal.py b/utils/panoptic/loss_tal.py new file mode 100644 index 0000000000000000000000000000000000000000..d8594395d4beada966859d578c6dc5476f948034 --- /dev/null +++ b/utils/panoptic/loss_tal.py @@ -0,0 +1,285 @@ +import os + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from torchvision.ops import sigmoid_focal_loss + +from utils.general import xywh2xyxy, xyxy2xywh +from utils.metrics import bbox_iou +from utils.panoptic.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist +from utils.panoptic.tal.assigner import TaskAlignedAssigner +from utils.torch_utils import de_parallel +from utils.panoptic.general import crop_mask + + +def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + # return positive, negative label smoothing BCE targets + return 1.0 - 0.5 * eps, 0.5 * eps + + +class VarifocalLoss(nn.Module): + # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 + def __init__(self): + super().__init__() + + def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): + weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label + with torch.cuda.amp.autocast(enabled=False): + loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), + reduction="none") * weight).sum() + return loss + + +class FocalLoss(nn.Module): + # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super().__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = "none" # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = torch.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == "mean": + return loss.mean() + elif self.reduction == "sum": + return loss.sum() + else: # 'none' + return loss + + +class BboxLoss(nn.Module): + def __init__(self, reg_max, use_dfl=False): + super().__init__() + self.reg_max = reg_max + self.use_dfl = use_dfl + + def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): + # iou loss + bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) + pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) + target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) + bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) + + iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) + loss_iou = 1.0 - iou + + #### wiou + #iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, WIoU=True, scale=True) + #if type(iou) is tuple: + # if len(iou) == 2: + # loss_iou = (iou[1].detach() * (1 - iou[0])) + # iou = iou[0] + # else: + # loss_iou = (iou[0] * iou[1]) + # iou = iou[-1] + #else: + # loss_iou = (1.0 - iou) # iou loss + + loss_iou *= bbox_weight + loss_iou = loss_iou.sum() / target_scores_sum + # loss_iou = loss_iou.mean() + + # dfl loss + if self.use_dfl: + dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) + pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) + target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) + target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) + loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight + loss_dfl = loss_dfl.sum() / target_scores_sum + else: + loss_dfl = torch.tensor(0.0).to(pred_dist.device) + + return loss_iou, loss_dfl, iou + + def _df_loss(self, pred_dist, target): + target_left = target.to(torch.long) + target_right = target_left + 1 + weight_left = target_right.to(torch.float) - target + weight_right = 1 - weight_left + loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( + target_left.shape) * weight_left + loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), + reduction="none").view(target_left.shape) * weight_right + return (loss_left + loss_right).mean(-1, keepdim=True) + + +class ComputeLoss: + # Compute losses + def __init__(self, model, use_dfl=True, overlap=True): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls = FocalLoss(BCEcls, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.BCEcls = BCEcls + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.no = m.no + self.nm = m.nm + self.overlap = overlap + self.reg_max = m.reg_max + self.device = device + + self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 + self.use_dfl = use_dfl + + def preprocess(self, targets, batch_size, scale_tensor): + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, p, targets, masks, semasks, img=None, epoch=0): + loss = torch.zeros(6, device=self.device) # box, cls, dfl + feats, pred_masks, proto, psemasks = p if len(p) == 4 else p[1] + batch_size, _, mask_h, mask_w = proto.shape + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + pred_masks = pred_masks.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size, grid_size = pred_scores.shape[:2] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + try: + batch_idx = targets[:, 0].view(-1, 1) + targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + except RuntimeError as e: + raise TypeError('ERROR.') from e + + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + + target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( + pred_scores.detach().sigmoid(), + (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_scores_sum = target_scores.sum() + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + # bbox loss + if fg_mask.sum(): + loss[0], loss[3], _ = self.bbox_loss(pred_distri, + pred_bboxes, + anchor_points, + target_bboxes / stride_tensor, + target_scores, + target_scores_sum, + fg_mask) + + # masks loss + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] + + for i in range(batch_size): + if fg_mask[i].sum(): + mask_idx = target_gt_idx[i][fg_mask[i]] + if self.overlap: + gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) + else: + gt_mask = masks[batch_idx.view(-1) == i][mask_idx] + xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] + marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) + mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) + loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, + marea) # seg loss + # Semantic Segmentation + # focal loss + pt = torch.flatten(psemasks, start_dim = 2).permute(0, 2, 1) + gt = torch.flatten(semasks, start_dim = 2).permute(0, 2, 1) + + bs, _, _ = gt.shape + #torch.clamp(torch.sigmoid(logits), min=eps, max= 1 - eps) + #total_loss = (sigmoid_focal_loss(pt.float(), gt.float(), alpha = .25, gamma = 2., reduction = 'mean')) / 2. + #total_loss = (sigmoid_focal_loss(pt.clamp(-16., 16.), gt, alpha = .25, gamma = 2., reduction = 'mean')) / 2. + total_loss = (sigmoid_focal_loss(pt, gt, alpha = .25, gamma = 2., reduction = 'mean')) / 2. + loss[4] += total_loss * 20. + + # dice loss + pt = torch.flatten(psemasks.softmax(dim = 1)) + gt = torch.flatten(semasks) + + inter_mask = torch.sum(torch.mul(pt, gt)) + union_mask = torch.sum(torch.add(pt, gt)) + dice_coef = (2. * inter_mask + 1.) / (union_mask + 1.) + loss[5] += (1. - dice_coef) / 2. + + loss[0] *= 7.5 # box gain + loss[1] *= 2.5 / batch_size + loss[2] *= 0.5 # cls gain + loss[3] *= 1.5 # dfl gain + loss[4] *= 2.5 #/ batch_size + loss[5] *= 2.5 #/ batch_size + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): + # Mask loss for one image + pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) + loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') + return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() diff --git a/utils/panoptic/metrics.py b/utils/panoptic/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..dcb8bc2d6df780a961ff1473de7d1e5f630d3e8d --- /dev/null +++ b/utils/panoptic/metrics.py @@ -0,0 +1,272 @@ +import numpy as np +import torch + +from ..metrics import ap_per_class + + +def fitness(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9, 0.1, 0.9] + return (x[:, :len(w)] * w).sum(1) + + +def ap_per_class_box_and_mask( + tp_m, + tp_b, + conf, + pred_cls, + target_cls, + plot=False, + save_dir=".", + names=(), +): + """ + Args: + tp_b: tp of boxes. + tp_m: tp of masks. + other arguments see `func: ap_per_class`. + """ + results_boxes = ap_per_class(tp_b, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Box")[2:] + results_masks = ap_per_class(tp_m, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Mask")[2:] + + results = { + "boxes": { + "p": results_boxes[0], + "r": results_boxes[1], + "ap": results_boxes[3], + "f1": results_boxes[2], + "ap_class": results_boxes[4]}, + "masks": { + "p": results_masks[0], + "r": results_masks[1], + "ap": results_masks[3], + "f1": results_masks[2], + "ap_class": results_masks[4]}} + return results + + +class Metric: + + def __init__(self) -> None: + self.p = [] # (nc, ) + self.r = [] # (nc, ) + self.f1 = [] # (nc, ) + self.all_ap = [] # (nc, 10) + self.ap_class_index = [] # (nc, ) + + @property + def ap50(self): + """AP@0.5 of all classes. + Return: + (nc, ) or []. + """ + return self.all_ap[:, 0] if len(self.all_ap) else [] + + @property + def ap(self): + """AP@0.5:0.95 + Return: + (nc, ) or []. + """ + return self.all_ap.mean(1) if len(self.all_ap) else [] + + @property + def mp(self): + """mean precision of all classes. + Return: + float. + """ + return self.p.mean() if len(self.p) else 0.0 + + @property + def mr(self): + """mean recall of all classes. + Return: + float. + """ + return self.r.mean() if len(self.r) else 0.0 + + @property + def map50(self): + """Mean AP@0.5 of all classes. + Return: + float. + """ + return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 + + @property + def map(self): + """Mean AP@0.5:0.95 of all classes. + Return: + float. + """ + return self.all_ap.mean() if len(self.all_ap) else 0.0 + + def mean_results(self): + """Mean of results, return mp, mr, map50, map""" + return (self.mp, self.mr, self.map50, self.map) + + def class_result(self, i): + """class-aware result, return p[i], r[i], ap50[i], ap[i]""" + return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) + + def get_maps(self, nc): + maps = np.zeros(nc) + self.map + for i, c in enumerate(self.ap_class_index): + maps[c] = self.ap[i] + return maps + + def update(self, results): + """ + Args: + results: tuple(p, r, ap, f1, ap_class) + """ + p, r, all_ap, f1, ap_class_index = results + self.p = p + self.r = r + self.all_ap = all_ap + self.f1 = f1 + self.ap_class_index = ap_class_index + + +class Metrics: + """Metric for boxes and masks.""" + + def __init__(self) -> None: + self.metric_box = Metric() + self.metric_mask = Metric() + + def update(self, results): + """ + Args: + results: Dict{'boxes': Dict{}, 'masks': Dict{}} + """ + self.metric_box.update(list(results["boxes"].values())) + self.metric_mask.update(list(results["masks"].values())) + + def mean_results(self): + return self.metric_box.mean_results() + self.metric_mask.mean_results() + + def class_result(self, i): + return self.metric_box.class_result(i) + self.metric_mask.class_result(i) + + def get_maps(self, nc): + return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) + + @property + def ap_class_index(self): + # boxes and masks have the same ap_class_index + return self.metric_box.ap_class_index + + +class Semantic_Metrics: + def __init__(self, nc, device): + self.nc = nc # number of classes + self.device = device + self.iou = [] + self.c_bit_counts = torch.zeros(nc, dtype = torch.long).to(device) + self.c_intersection_counts = torch.zeros(nc, dtype = torch.long).to(device) + self.c_union_counts = torch.zeros(nc, dtype = torch.long).to(device) + + def update(self, pred_masks, target_masks): + nb, nc, h, w = pred_masks.shape + device = pred_masks.device + + for b in range(nb): + onehot_mask = pred_masks[b].to(device) + # convert predict mask to one hot + semantic_mask = torch.flatten(onehot_mask, start_dim = 1).permute(1, 0) # class x h x w -> (h x w) x class + max_idx = semantic_mask.argmax(1) + output_masks = (torch.zeros(semantic_mask.shape).to(self.device)).scatter(1, max_idx.unsqueeze(1), 1.0) # one hot: (h x w) x class + output_masks = torch.reshape(output_masks.permute(1, 0), (nc, h, w)) # (h x w) x class -> class x h x w + onehot_mask = output_masks.int() + + for c in range(self.nc): + pred_mask = onehot_mask[c].to(device) + target_mask = target_masks[b, c].to(device) + + # calculate IoU + intersection = (torch.logical_and(pred_mask, target_mask).sum()).item() + union = (torch.logical_or(pred_mask, target_mask).sum()).item() + iou = 0. if (0 == union) else (intersection / union) + + # record class pixel counts, intersection counts, union counts + self.c_bit_counts[c] += target_mask.int().sum() + self.c_intersection_counts[c] += intersection + self.c_union_counts[c] += union + + self.iou.append(iou) + + def results(self): + # Mean IoU + miou = 0. if (0 == len(self.iou)) else np.sum(self.iou) / (len(self.iou) * self.nc) + + # Frequency Weighted IoU + c_iou = self.c_intersection_counts / (self.c_union_counts + 1) # add smooth + # c_bit_counts = self.c_bit_counts.astype(int) + total_c_bit_counts = self.c_bit_counts.sum() + freq_ious = torch.zeros(1, dtype = torch.long).to(self.device) if (0 == total_c_bit_counts) else (self.c_bit_counts / total_c_bit_counts) * c_iou + fwiou = (freq_ious.sum()).item() + + return (miou, fwiou) + + def reset(self): + self.iou = [] + self.c_bit_counts = torch.zeros(self.nc, dtype = torch.long).to(self.device) + self.c_intersection_counts = torch.zeros(self.nc, dtype = torch.long).to(self.device) + self.c_union_counts = torch.zeros(self.nc, dtype = torch.long).to(self.device) + + +KEYS = [ + "train/box_loss", + "train/seg_loss", # train loss + "train/cls_loss", + "train/dfl_loss", + "train/fcl_loss", + "train/dic_loss", + "metrics/precision(B)", + "metrics/recall(B)", + "metrics/mAP_0.5(B)", + "metrics/mAP_0.5:0.95(B)", # metrics + "metrics/precision(M)", + "metrics/recall(M)", + "metrics/mAP_0.5(M)", + "metrics/mAP_0.5:0.95(M)", # metrics + "metrics/MIOUS(S)", + "metrics/FWIOUS(S)", # metrics + "val/box_loss", + "val/seg_loss", # val loss + "val/cls_loss", + "val/dfl_loss", + "val/fcl_loss", + "val/dic_loss", + "x/lr0", + "x/lr1", + "x/lr2",] + +BEST_KEYS = [ + "best/epoch", + "best/precision(B)", + "best/recall(B)", + "best/mAP_0.5(B)", + "best/mAP_0.5:0.95(B)", + "best/precision(M)", + "best/recall(M)", + "best/mAP_0.5(M)", + "best/mAP_0.5:0.95(M)", + "best/MIOUS(S)", + "best/FWIOUS(S)",] diff --git a/utils/panoptic/plots.py b/utils/panoptic/plots.py new file mode 100644 index 0000000000000000000000000000000000000000..55d87b7950554aa803d160ad8d20205aef37dc9c --- /dev/null +++ b/utils/panoptic/plots.py @@ -0,0 +1,164 @@ +import contextlib +import math +from pathlib import Path + +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import torch +from torchvision.utils import draw_segmentation_masks, save_image + +from .. import threaded +from ..general import xywh2xyxy +from ..plots import Annotator, colors + + +@threaded +def plot_images_and_masks(images, targets, masks, semasks, paths=None, fname='images.jpg', names=None): + + try: + if images.shape[-2:] != semasks.shape[-2:]: + m = torch.nn.Upsample(scale_factor=4, mode='nearest') + semasks = m(semasks) + + for idx in range(images.shape[0]): + output_img = draw_segmentation_masks( + image = images[idx, :, :, :].cpu().to(dtype = torch.uint8), + masks = semasks[idx, :, :, :].cpu().to(dtype = torch.bool), + alpha = 1) + cv2.imwrite( + '{}_{}.jpg'.format(fname, idx), + torch.permute(output_img, (1, 2, 0)).numpy() + ) + except: + pass + + # Plot image grid with labels + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + if isinstance(masks, torch.Tensor): + masks = masks.cpu().numpy().astype(int) + if isinstance(semasks, torch.Tensor): + semasks = semasks.cpu().numpy().astype(int) + + max_size = 1920 # max image size + max_subplots = 16 # max image subplots, i.e. 4x4 + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + if np.max(images[0]) <= 1: + images *= 255 # de-normalise (optional) + + # Build Image + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, im in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + im = im.transpose(1, 2, 0) + mosaic[y:y + h, x:x + w, :] = im + + # Resize (optional) + scale = max_size / ns / max(h, w) + if scale < 1: + h = math.ceil(scale * h) + w = math.ceil(scale * w) + mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) + + # Annotate + fs = int((h + w) * ns * 0.01) # font size + annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) + for i in range(i + 1): + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders + if paths: + annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames + if len(targets) > 0: + idx = targets[:, 0] == i + ti = targets[idx] # image targets + + boxes = xywh2xyxy(ti[:, 2:6]).T + classes = ti[:, 1].astype('int') + labels = ti.shape[1] == 6 # labels if no conf column + conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) + + if boxes.shape[1]: + if boxes.max() <= 1.01: # if normalized with tolerance 0.01 + boxes[[0, 2]] *= w # scale to pixels + boxes[[1, 3]] *= h + elif scale < 1: # absolute coords need scale if image scales + boxes *= scale + boxes[[0, 2]] += x + boxes[[1, 3]] += y + for j, box in enumerate(boxes.T.tolist()): + cls = classes[j] + color = colors(cls) + cls = names[cls] if names else cls + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' + annotator.box_label(box, label, color=color) + + # Plot masks + if len(masks): + if masks.max() > 1.0: # mean that masks are overlap + image_masks = masks[[i]] # (1, 640, 640) + nl = len(ti) + index = np.arange(nl).reshape(nl, 1, 1) + 1 + image_masks = np.repeat(image_masks, nl, axis=0) + image_masks = np.where(image_masks == index, 1.0, 0.0) + else: + image_masks = masks[idx] + + im = np.asarray(annotator.im).copy() + for j, box in enumerate(boxes.T.tolist()): + if labels or conf[j] > 0.25: # 0.25 conf thresh + color = colors(classes[j]) + mh, mw = image_masks[j].shape + if mh != h or mw != w: + mask = image_masks[j].astype(np.uint8) + mask = cv2.resize(mask, (w, h)) + mask = mask.astype(bool) + else: + mask = image_masks[j].astype(bool) + with contextlib.suppress(Exception): + im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 + annotator.fromarray(im) + annotator.im.save(fname) # save + + +def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): + # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') + save_dir = Path(file).parent if file else Path(dir) + fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) + ax = ax.ravel() + files = list(save_dir.glob("results*.csv")) + assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." + for f in files: + try: + data = pd.read_csv(f) + index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + + 0.1 * data.values[:, 11]) + s = [x.strip() for x in data.columns] + x = data.values[:, 0] + for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): + y = data.values[:, j] + # y[y == 0] = np.nan # don't show zero values + ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) + if best: + # best + ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3) + ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") + else: + # last + ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) + ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") + # if j in [8, 9, 10]: # share train and val loss y axes + # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + print(f"Warning: Plotting error for {f}: {e}") + ax[1].legend() + fig.savefig(save_dir / "results.png", dpi=200) + plt.close() diff --git a/utils/panoptic/tal/__init__.py b/utils/panoptic/tal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..84952a8167bc2975913a6def6b4f027d566552a9 --- /dev/null +++ b/utils/panoptic/tal/__init__.py @@ -0,0 +1 @@ +# init \ No newline at end of file diff --git a/utils/panoptic/tal/anchor_generator.py b/utils/panoptic/tal/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..0de163651e21225445097f90e05a6c6d8ff10092 --- /dev/null +++ b/utils/panoptic/tal/anchor_generator.py @@ -0,0 +1,38 @@ +import torch + +from utils.general import check_version + +TORCH_1_10 = check_version(torch.__version__, '1.10.0') + + +def make_anchors(feats, strides, grid_cell_offset=0.5): + """Generate anchors from features.""" + anchor_points, stride_tensor = [], [] + assert feats is not None + dtype, device = feats[0].dtype, feats[0].device + for i, stride in enumerate(strides): + _, _, h, w = feats[i].shape + sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x + sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y + sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) + anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) + stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) + return torch.cat(anchor_points), torch.cat(stride_tensor) + + +def dist2bbox(distance, anchor_points, xywh=True, dim=-1): + """Transform distance(ltrb) to box(xywh or xyxy).""" + lt, rb = torch.split(distance, 2, dim) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if xywh: + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + return torch.cat((c_xy, wh), dim) # xywh bbox + return torch.cat((x1y1, x2y2), dim) # xyxy bbox + + +def bbox2dist(anchor_points, bbox, reg_max): + """Transform bbox(xyxy) to dist(ltrb).""" + x1y1, x2y2 = torch.split(bbox, 2, -1) + return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) diff --git a/utils/panoptic/tal/assigner.py b/utils/panoptic/tal/assigner.py new file mode 100644 index 0000000000000000000000000000000000000000..8c61f5c0508b87522eb4cf048bbe72973dbb4be4 --- /dev/null +++ b/utils/panoptic/tal/assigner.py @@ -0,0 +1,181 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils.metrics import bbox_iou + + +def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): + """select the positive anchor center in gt + + Args: + xy_centers (Tensor): shape(h*w, 4) + gt_bboxes (Tensor): shape(b, n_boxes, 4) + Return: + (Tensor): shape(b, n_boxes, h*w) + """ + n_anchors = xy_centers.shape[0] + bs, n_boxes, _ = gt_bboxes.shape + lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom + bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) + # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) + return bbox_deltas.amin(3).gt_(eps) + + +def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): + """if an anchor box is assigned to multiple gts, + the one with the highest iou will be selected. + + Args: + mask_pos (Tensor): shape(b, n_max_boxes, h*w) + overlaps (Tensor): shape(b, n_max_boxes, h*w) + Return: + target_gt_idx (Tensor): shape(b, h*w) + fg_mask (Tensor): shape(b, h*w) + mask_pos (Tensor): shape(b, n_max_boxes, h*w) + """ + # (b, n_max_boxes, h*w) -> (b, h*w) + fg_mask = mask_pos.sum(-2) + if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes + mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) + max_overlaps_idx = overlaps.argmax(1) # (b, h*w) + is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) + is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) + mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) + fg_mask = mask_pos.sum(-2) + # find each grid serve which gt(index) + target_gt_idx = mask_pos.argmax(-2) # (b, h*w) + return target_gt_idx, fg_mask, mask_pos + + +class TaskAlignedAssigner(nn.Module): + def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): + super().__init__() + self.topk = topk + self.num_classes = num_classes + self.bg_idx = num_classes + self.alpha = alpha + self.beta = beta + self.eps = eps + + @torch.no_grad() + def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): + """This code referenced to + https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py + + Args: + pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) + pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) + anc_points (Tensor): shape(num_total_anchors, 2) + gt_labels (Tensor): shape(bs, n_max_boxes, 1) + gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) + mask_gt (Tensor): shape(bs, n_max_boxes, 1) + Returns: + target_labels (Tensor): shape(bs, num_total_anchors) + target_bboxes (Tensor): shape(bs, num_total_anchors, 4) + target_scores (Tensor): shape(bs, num_total_anchors, num_classes) + fg_mask (Tensor): shape(bs, num_total_anchors) + """ + self.bs = pd_scores.size(0) + self.n_max_boxes = gt_bboxes.size(1) + + if self.n_max_boxes == 0: + device = gt_bboxes.device + return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), + torch.zeros_like(pd_bboxes).to(device), + torch.zeros_like(pd_scores).to(device), + torch.zeros_like(pd_scores[..., 0]).to(device), + torch.zeros_like(pd_scores[..., 0]).to(device)) + + mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, + mask_gt) + + target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) + + # assigned target + target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) + + # normalize + align_metric *= mask_pos + pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj + pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj + norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) + target_scores = target_scores * norm_align_metric + + return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx + + def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): + + # get anchor_align metric, (b, max_num_obj, h*w) + align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) + # get in_gts mask, (b, max_num_obj, h*w) + mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) + # get topk_metric mask, (b, max_num_obj, h*w) + mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, + topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) + # merge all mask to a final mask, (b, max_num_obj, h*w) + mask_pos = mask_topk * mask_in_gts * mask_gt + + return mask_pos, align_metric, overlaps + + def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): + + gt_labels = gt_labels.to(torch.long) # b, max_num_obj, 1 + ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj + ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj + ind[1] = gt_labels.squeeze(-1) # b, max_num_obj + # get the scores of each grid for each gt cls + bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w + + overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) + #overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, WIoU=True, scale=True)[-1].squeeze(3).clamp(0) + align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) + return align_metric, overlaps + + def select_topk_candidates(self, metrics, largest=True, topk_mask=None): + """ + Args: + metrics: (b, max_num_obj, h*w). + topk_mask: (b, max_num_obj, topk) or None + """ + + num_anchors = metrics.shape[-1] # h*w + # (b, max_num_obj, topk) + topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) + if topk_mask is None: + topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) + # (b, max_num_obj, topk) + topk_idxs = torch.where(topk_mask, topk_idxs, 0) + # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) + is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) + # filter invalid bboxes + # assigned topk should be unique, this is for dealing with empty labels + # since empty labels will generate index `0` through `F.one_hot` + # NOTE: but what if the topk_idxs include `0`? + is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) + return is_in_topk.to(metrics.dtype) + + def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): + """ + Args: + gt_labels: (b, max_num_obj, 1) + gt_bboxes: (b, max_num_obj, 4) + target_gt_idx: (b, h*w) + fg_mask: (b, h*w) + """ + + # assigned target labels, (b, 1) + batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] + target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) + target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) + + # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) + target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] + + # assigned target scores + target_labels.clamp(0) + target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) + fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) + target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) + + return target_labels, target_bboxes, target_scores diff --git a/utils/plots.py b/utils/plots.py new file mode 100644 index 0000000000000000000000000000000000000000..fa49dc19d7f4b445a76de1e2ee135defa95778e3 --- /dev/null +++ b/utils/plots.py @@ -0,0 +1,570 @@ +import contextlib +import math +import os +from copy import copy +from pathlib import Path +from urllib.error import URLError + +import cv2 +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sn +import torch +from PIL import Image, ImageDraw, ImageFont + +from utils import TryExcept, threaded +from utils.general import (CONFIG_DIR, FONT, LOGGER, check_font, check_requirements, clip_boxes, increment_path, + is_ascii, xywh2xyxy, xyxy2xywh) +from utils.metrics import fitness +from utils.segment.general import scale_image + +# Settings +RANK = int(os.getenv('RANK', -1)) +matplotlib.rc('font', **{'size': 11}) +matplotlib.use('Agg') # for writing to files only + + +class Colors: + # Ultralytics color palette https://ultralytics.com/ + def __init__(self): + # hex = matplotlib.colors.TABLEAU_COLORS.values() + hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB', + '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7') + self.palette = [self.hex2rgb(f'#{c}') for c in hexs] + self.n = len(self.palette) + + def __call__(self, i, bgr=False): + c = self.palette[int(i) % self.n] + return (c[2], c[1], c[0]) if bgr else c + + @staticmethod + def hex2rgb(h): # rgb order (PIL) + return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) + + +colors = Colors() # create instance for 'from utils.plots import colors' + + +def check_pil_font(font=FONT, size=10): + # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary + font = Path(font) + font = font if font.exists() else (CONFIG_DIR / font.name) + try: + return ImageFont.truetype(str(font) if font.exists() else font.name, size) + except Exception: # download if missing + try: + check_font(font) + return ImageFont.truetype(str(font), size) + except TypeError: + check_requirements('Pillow>=8.4.0') # known issue https://github.com/ultralytics/yolov5/issues/5374 + except URLError: # not online + return ImageFont.load_default() + + +class Annotator: + # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations + def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'): + assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.' + non_ascii = not is_ascii(example) # non-latin labels, i.e. asian, arabic, cyrillic + self.pil = pil or non_ascii + if self.pil: # use PIL + self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) + self.draw = ImageDraw.Draw(self.im) + self.font = check_pil_font(font='Arial.Unicode.ttf' if non_ascii else font, + size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12)) + else: # use cv2 + self.im = im + self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width + + def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)): + # Add one xyxy box to image with label + if self.pil or not is_ascii(label): + self.draw.rectangle(box, width=self.lw, outline=color) # box + if label: + w, h = self.font.getsize(label) # text width, height + outside = box[1] - h >= 0 # label fits outside box + self.draw.rectangle( + (box[0], box[1] - h if outside else box[1], box[0] + w + 1, + box[1] + 1 if outside else box[1] + h + 1), + fill=color, + ) + # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0 + self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font) + else: # cv2 + p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA) + if label: + tf = max(self.lw - 1, 1) # font thickness + w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height + outside = p1[1] - h >= 3 + p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3 + cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled + cv2.putText(self.im, + label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), + 0, + self.lw / 3, + txt_color, + thickness=tf, + lineType=cv2.LINE_AA) + + def masks(self, masks, colors, im_gpu=None, alpha=0.5): + """Plot masks at once. + Args: + masks (tensor): predicted masks on cuda, shape: [n, h, w] + colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] + im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] + alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque + """ + if self.pil: + # convert to numpy first + self.im = np.asarray(self.im).copy() + if im_gpu is None: + # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...) + if len(masks) == 0: + return + if isinstance(masks, torch.Tensor): + masks = torch.as_tensor(masks, dtype=torch.uint8) + masks = masks.permute(1, 2, 0).contiguous() + masks = masks.cpu().numpy() + # masks = np.ascontiguousarray(masks.transpose(1, 2, 0)) + masks = scale_image(masks.shape[:2], masks, self.im.shape) + masks = np.asarray(masks, dtype=np.float32) + colors = np.asarray(colors, dtype=np.float32) # shape(n,3) + s = masks.sum(2, keepdims=True).clip(0, 1) # add all masks together + masks = (masks @ colors).clip(0, 255) # (h,w,n) @ (n,3) = (h,w,3) + self.im[:] = masks * alpha + self.im * (1 - s * alpha) + else: + if len(masks) == 0: + self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255 + colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0 + colors = colors[:, None, None] # shape(n,1,1,3) + masks = masks.unsqueeze(3) # shape(n,h,w,1) + masks_color = masks * (colors * alpha) # shape(n,h,w,3) + + inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1) + mcs = (masks_color * inv_alph_masks).sum(0) * 2 # mask color summand shape(n,h,w,3) + + im_gpu = im_gpu.flip(dims=[0]) # flip channel + im_gpu = im_gpu.permute(1, 2, 0).contiguous() # shape(h,w,3) + im_gpu = im_gpu * inv_alph_masks[-1] + mcs + im_mask = (im_gpu * 255).byte().cpu().numpy() + self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape) + if self.pil: + # convert im back to PIL and update draw + self.fromarray(self.im) + + def rectangle(self, xy, fill=None, outline=None, width=1): + # Add rectangle to image (PIL-only) + self.draw.rectangle(xy, fill, outline, width) + + def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'): + # Add text to image (PIL-only) + if anchor == 'bottom': # start y from font bottom + w, h = self.font.getsize(text) # text width, height + xy[1] += 1 - h + self.draw.text(xy, text, fill=txt_color, font=self.font) + + def fromarray(self, im): + # Update self.im from a numpy array + self.im = im if isinstance(im, Image.Image) else Image.fromarray(im) + self.draw = ImageDraw.Draw(self.im) + + def result(self): + # Return annotated image as array + return np.asarray(self.im) + + +def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')): + """ + x: Features to be visualized + module_type: Module type + stage: Module stage within model + n: Maximum number of feature maps to plot + save_dir: Directory to save results + """ + if 'Detect' not in module_type: + batch, channels, height, width = x.shape # batch, channels, height, width + if height > 1 and width > 1: + f = save_dir / f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename + + blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels + n = min(n, channels) # number of plots + fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols + ax = ax.ravel() + plt.subplots_adjust(wspace=0.05, hspace=0.05) + for i in range(n): + ax[i].imshow(blocks[i].squeeze()) # cmap='gray' + ax[i].axis('off') + + LOGGER.info(f'Saving {f}... ({n}/{channels})') + plt.savefig(f, dpi=300, bbox_inches='tight') + plt.close() + np.save(str(f.with_suffix('.npy')), x[0].cpu().numpy()) # npy save + + +def hist2d(x, y, n=100): + # 2d histogram used in labels.png and evolve.png + xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) + hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges)) + xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1) + yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1) + return np.log(hist[xidx, yidx]) + + +def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5): + from scipy.signal import butter, filtfilt + + # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy + def butter_lowpass(cutoff, fs, order): + nyq = 0.5 * fs + normal_cutoff = cutoff / nyq + return butter(order, normal_cutoff, btype='low', analog=False) + + b, a = butter_lowpass(cutoff, fs, order=order) + return filtfilt(b, a, data) # forward-backward filter + + +def output_to_target(output, max_det=300): + # Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting + targets = [] + for i, o in enumerate(output): + box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1) + j = torch.full((conf.shape[0], 1), i) + targets.append(torch.cat((j, cls, xyxy2xywh(box), conf), 1)) + return torch.cat(targets, 0).numpy() + + +@threaded +def plot_images(images, targets, paths=None, fname='images.jpg', names=None): + # Plot image grid with labels + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + + max_size = 1920 # max image size + max_subplots = 16 # max image subplots, i.e. 4x4 + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + if np.max(images[0]) <= 1: + images *= 255 # de-normalise (optional) + + # Build Image + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, im in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + im = im.transpose(1, 2, 0) + mosaic[y:y + h, x:x + w, :] = im + + # Resize (optional) + scale = max_size / ns / max(h, w) + if scale < 1: + h = math.ceil(scale * h) + w = math.ceil(scale * w) + mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) + + # Annotate + fs = int((h + w) * ns * 0.01) # font size + annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) + for i in range(i + 1): + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders + if paths: + annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames + if len(targets) > 0: + ti = targets[targets[:, 0] == i] # image targets + boxes = xywh2xyxy(ti[:, 2:6]).T + classes = ti[:, 1].astype('int') + labels = ti.shape[1] == 6 # labels if no conf column + conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) + + if boxes.shape[1]: + if boxes.max() <= 1.01: # if normalized with tolerance 0.01 + boxes[[0, 2]] *= w # scale to pixels + boxes[[1, 3]] *= h + elif scale < 1: # absolute coords need scale if image scales + boxes *= scale + boxes[[0, 2]] += x + boxes[[1, 3]] += y + for j, box in enumerate(boxes.T.tolist()): + cls = classes[j] + color = colors(cls) + cls = names[cls] if names else cls + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' + annotator.box_label(box, label, color=color) + annotator.im.save(fname) # save + + +def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''): + # Plot LR simulating training for full epochs + optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals + y = [] + for _ in range(epochs): + scheduler.step() + y.append(optimizer.param_groups[0]['lr']) + plt.plot(y, '.-', label='LR') + plt.xlabel('epoch') + plt.ylabel('LR') + plt.grid() + plt.xlim(0, epochs) + plt.ylim(0) + plt.savefig(Path(save_dir) / 'LR.png', dpi=200) + plt.close() + + +def plot_val_txt(): # from utils.plots import *; plot_val() + # Plot val.txt histograms + x = np.loadtxt('val.txt', dtype=np.float32) + box = xyxy2xywh(x[:, :4]) + cx, cy = box[:, 0], box[:, 1] + + fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True) + ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0) + ax.set_aspect('equal') + plt.savefig('hist2d.png', dpi=300) + + fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True) + ax[0].hist(cx, bins=600) + ax[1].hist(cy, bins=600) + plt.savefig('hist1d.png', dpi=200) + + +def plot_targets_txt(): # from utils.plots import *; plot_targets_txt() + # Plot targets.txt histograms + x = np.loadtxt('targets.txt', dtype=np.float32).T + s = ['x targets', 'y targets', 'width targets', 'height targets'] + fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True) + ax = ax.ravel() + for i in range(4): + ax[i].hist(x[i], bins=100, label=f'{x[i].mean():.3g} +/- {x[i].std():.3g}') + ax[i].legend() + ax[i].set_title(s[i]) + plt.savefig('targets.jpg', dpi=200) + + +def plot_val_study(file='', dir='', x=None): # from utils.plots import *; plot_val_study() + # Plot file=study.txt generated by val.py (or plot all study*.txt in dir) + save_dir = Path(file).parent if file else Path(dir) + plot2 = False # plot additional results + if plot2: + ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)[1].ravel() + + fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True) + # for f in [save_dir / f'study_coco_{x}.txt' for x in ['yolov5n6', 'yolov5s6', 'yolov5m6', 'yolov5l6', 'yolov5x6']]: + for f in sorted(save_dir.glob('study*.txt')): + y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T + x = np.arange(y.shape[1]) if x is None else np.array(x) + if plot2: + s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_preprocess (ms/img)', 't_inference (ms/img)', 't_NMS (ms/img)'] + for i in range(7): + ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8) + ax[i].set_title(s[i]) + + j = y[3].argmax() + 1 + ax2.plot(y[5, 1:j], + y[3, 1:j] * 1E2, + '.-', + linewidth=2, + markersize=8, + label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO')) + + ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5], + 'k.-', + linewidth=2, + markersize=8, + alpha=.25, + label='EfficientDet') + + ax2.grid(alpha=0.2) + ax2.set_yticks(np.arange(20, 60, 5)) + ax2.set_xlim(0, 57) + ax2.set_ylim(25, 55) + ax2.set_xlabel('GPU Speed (ms/img)') + ax2.set_ylabel('COCO AP val') + ax2.legend(loc='lower right') + f = save_dir / 'study.png' + print(f'Saving {f}...') + plt.savefig(f, dpi=300) + + +@TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395 +def plot_labels(labels, names=(), save_dir=Path('')): + # plot dataset labels + LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ") + c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes + nc = int(c.max() + 1) # number of classes + x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height']) + + # seaborn correlogram + sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9)) + plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200) + plt.close() + + # matplotlib labels + matplotlib.use('svg') # faster + ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel() + y = ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8) + with contextlib.suppress(Exception): # color histogram bars by class + [y[2].patches[i].set_color([x / 255 for x in colors(i)]) for i in range(nc)] # known issue #3195 + ax[0].set_ylabel('instances') + if 0 < len(names) < 30: + ax[0].set_xticks(range(len(names))) + ax[0].set_xticklabels(list(names.values()), rotation=90, fontsize=10) + else: + ax[0].set_xlabel('classes') + sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9) + sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9) + + # rectangles + labels[:, 1:3] = 0.5 # center + labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000 + img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255) + for cls, *box in labels[:1000]: + ImageDraw.Draw(img).rectangle(box, width=1, outline=colors(cls)) # plot + ax[1].imshow(img) + ax[1].axis('off') + + for a in [0, 1, 2, 3]: + for s in ['top', 'right', 'left', 'bottom']: + ax[a].spines[s].set_visible(False) + + plt.savefig(save_dir / 'labels.jpg', dpi=200) + matplotlib.use('Agg') + plt.close() + + +def imshow_cls(im, labels=None, pred=None, names=None, nmax=25, verbose=False, f=Path('images.jpg')): + # Show classification image grid with labels (optional) and predictions (optional) + from utils.augmentations import denormalize + + names = names or [f'class{i}' for i in range(1000)] + blocks = torch.chunk(denormalize(im.clone()).cpu().float(), len(im), + dim=0) # select batch index 0, block by channels + n = min(len(blocks), nmax) # number of plots + m = min(8, round(n ** 0.5)) # 8 x 8 default + fig, ax = plt.subplots(math.ceil(n / m), m) # 8 rows x n/8 cols + ax = ax.ravel() if m > 1 else [ax] + # plt.subplots_adjust(wspace=0.05, hspace=0.05) + for i in range(n): + ax[i].imshow(blocks[i].squeeze().permute((1, 2, 0)).numpy().clip(0.0, 1.0)) + ax[i].axis('off') + if labels is not None: + s = names[labels[i]] + (f'—{names[pred[i]]}' if pred is not None else '') + ax[i].set_title(s, fontsize=8, verticalalignment='top') + plt.savefig(f, dpi=300, bbox_inches='tight') + plt.close() + if verbose: + LOGGER.info(f"Saving {f}") + if labels is not None: + LOGGER.info('True: ' + ' '.join(f'{names[i]:3s}' for i in labels[:nmax])) + if pred is not None: + LOGGER.info('Predicted:' + ' '.join(f'{names[i]:3s}' for i in pred[:nmax])) + return f + + +def plot_evolve(evolve_csv='path/to/evolve.csv'): # from utils.plots import *; plot_evolve() + # Plot evolve.csv hyp evolution results + evolve_csv = Path(evolve_csv) + data = pd.read_csv(evolve_csv) + keys = [x.strip() for x in data.columns] + x = data.values + f = fitness(x) + j = np.argmax(f) # max fitness index + plt.figure(figsize=(10, 12), tight_layout=True) + matplotlib.rc('font', **{'size': 8}) + print(f'Best results from row {j} of {evolve_csv}:') + for i, k in enumerate(keys[7:]): + v = x[:, 7 + i] + mu = v[j] # best single result + plt.subplot(6, 5, i + 1) + plt.scatter(v, f, c=hist2d(v, f, 20), cmap='viridis', alpha=.8, edgecolors='none') + plt.plot(mu, f.max(), 'k+', markersize=15) + plt.title(f'{k} = {mu:.3g}', fontdict={'size': 9}) # limit to 40 characters + if i % 5 != 0: + plt.yticks([]) + print(f'{k:>15}: {mu:.3g}') + f = evolve_csv.with_suffix('.png') # filename + plt.savefig(f, dpi=200) + plt.close() + print(f'Saved {f}') + + +def plot_results(file='path/to/results.csv', dir=''): + # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') + save_dir = Path(file).parent if file else Path(dir) + fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) + ax = ax.ravel() + files = list(save_dir.glob('results*.csv')) + assert len(files), f'No results.csv files found in {save_dir.resolve()}, nothing to plot.' + for f in files: + try: + data = pd.read_csv(f) + s = [x.strip() for x in data.columns] + x = data.values[:, 0] + for i, j in enumerate([1, 2, 3, 4, 5, 8, 9, 10, 6, 7]): + y = data.values[:, j].astype('float') + # y[y == 0] = np.nan # don't show zero values + ax[i].plot(x, y, marker='.', label=f.stem, linewidth=2, markersize=8) + ax[i].set_title(s[j], fontsize=12) + # if j in [8, 9, 10]: # share train and val loss y axes + # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + LOGGER.info(f'Warning: Plotting error for {f}: {e}') + ax[1].legend() + fig.savefig(save_dir / 'results.png', dpi=200) + plt.close() + + +def profile_idetection(start=0, stop=0, labels=(), save_dir=''): + # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection() + ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel() + s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS'] + files = list(Path(save_dir).glob('frames*.txt')) + for fi, f in enumerate(files): + try: + results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows + n = results.shape[1] # number of rows + x = np.arange(start, min(stop, n) if stop else n) + results = results[:, x] + t = (results[0] - results[0].min()) # set t0=0s + results[0] = x + for i, a in enumerate(ax): + if i < len(results): + label = labels[fi] if len(labels) else f.stem.replace('frames_', '') + a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5) + a.set_title(s[i]) + a.set_xlabel('time (s)') + # if fi == len(files) - 1: + # a.set_ylim(bottom=0) + for side in ['top', 'right']: + a.spines[side].set_visible(False) + else: + a.remove() + except Exception as e: + print(f'Warning: Plotting error for {f}; {e}') + ax[1].legend() + plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200) + + +def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, square=False, BGR=False, save=True): + # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop + xyxy = torch.tensor(xyxy).view(-1, 4) + b = xyxy2xywh(xyxy) # boxes + if square: + b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square + b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad + xyxy = xywh2xyxy(b).long() + clip_boxes(xyxy, im.shape) + crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] + if save: + file.parent.mkdir(parents=True, exist_ok=True) # make directory + f = str(increment_path(file).with_suffix('.jpg')) + # cv2.imwrite(f, crop) # save BGR, https://github.com/ultralytics/yolov5/issues/7007 chroma subsampling issue + Image.fromarray(crop[..., ::-1]).save(f, quality=95, subsampling=0) # save RGB + return crop diff --git a/utils/segment/__init__.py b/utils/segment/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..84952a8167bc2975913a6def6b4f027d566552a9 --- /dev/null +++ b/utils/segment/__init__.py @@ -0,0 +1 @@ +# init \ No newline at end of file diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py new file mode 100644 index 0000000000000000000000000000000000000000..34b5bf75f9feb860270ff8502360609408c64b72 --- /dev/null +++ b/utils/segment/augmentations.py @@ -0,0 +1,99 @@ +import math +import random + +import cv2 +import numpy as np + +from ..augmentations import box_candidates +from ..general import resample_segments, segment2box + + +def mixup(im, labels, segments, im2, labels2, segments2): + # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf + r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0 + im = (im * r + im2 * (1 - r)).astype(np.uint8) + labels = np.concatenate((labels, labels2), 0) + segments = np.concatenate((segments, segments2), 0) + return im, labels, segments + + +def random_perspective(im, + targets=(), + segments=(), + degrees=10, + translate=.1, + scale=.1, + shear=10, + perspective=0.0, + border=(0, 0)): + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) + # targets = [cls, xyxy] + + height = im.shape[0] + border[0] * 2 # shape(h,w,c) + width = im.shape[1] + border[1] * 2 + + # Center + C = np.eye(3) + C[0, 2] = -im.shape[1] / 2 # x translation (pixels) + C[1, 2] = -im.shape[0] / 2 # y translation (pixels) + + # Perspective + P = np.eye(3) + P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + + # Rotation and Scale + R = np.eye(3) + a = random.uniform(-degrees, degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(1 - scale, 1 + scale) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3) + T[0, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * width) # x translation (pixels) + T[1, 2] = (random.uniform(0.5 - translate, 0.5 + translate) * height) # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if perspective: + im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114)) + else: # affine + im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) + + # Visualize + # import matplotlib.pyplot as plt + # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel() + # ax[0].imshow(im[:, :, ::-1]) # base + # ax[1].imshow(im2[:, :, ::-1]) # warped + + # Transform label coordinates + n = len(targets) + new_segments = [] + if n: + new = np.zeros((n, 4)) + segments = resample_segments(segments) # upsample + for i, segment in enumerate(segments): + xy = np.ones((len(segment), 3)) + xy[:, :2] = segment + xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]) # perspective rescale or affine + + # clip + new[i] = segment2box(xy, width, height) + new_segments.append(xy) + + # filter candidates + i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01) + targets = targets[i] + targets[:, 1:5] = new[i] + new_segments = np.array(new_segments)[i] + + return im, targets, new_segments diff --git a/utils/segment/dataloaders.py b/utils/segment/dataloaders.py new file mode 100644 index 0000000000000000000000000000000000000000..335570a63d330696612d87b51d2bd8ae0541c37b --- /dev/null +++ b/utils/segment/dataloaders.py @@ -0,0 +1,328 @@ +import os +import random + +import cv2 +import numpy as np +import torch +from torch.utils.data import DataLoader, distributed + +from ..augmentations import augment_hsv, copy_paste, letterbox +from ..dataloaders import InfiniteDataLoader, LoadImagesAndLabels, seed_worker +from ..general import LOGGER, xyn2xy, xywhn2xyxy, xyxy2xywhn +from ..torch_utils import torch_distributed_zero_first +from .augmentations import mixup, random_perspective + +RANK = int(os.getenv('RANK', -1)) + + +def create_dataloader(path, + imgsz, + batch_size, + stride, + single_cls=False, + hyp=None, + augment=False, + cache=False, + pad=0.0, + rect=False, + rank=-1, + workers=8, + image_weights=False, + close_mosaic=False, + quad=False, + prefix='', + shuffle=False, + mask_downsample_ratio=1, + overlap_mask=False): + if rect and shuffle: + LOGGER.warning('WARNING ⚠️ --rect is incompatible with DataLoader shuffle, setting shuffle=False') + shuffle = False + with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP + dataset = LoadImagesAndLabelsAndMasks( + path, + imgsz, + batch_size, + augment=augment, # augmentation + hyp=hyp, # hyperparameters + rect=rect, # rectangular batches + cache_images=cache, + single_cls=single_cls, + stride=int(stride), + pad=pad, + image_weights=image_weights, + prefix=prefix, + downsample_ratio=mask_downsample_ratio, + overlap=overlap_mask) + + batch_size = min(batch_size, len(dataset)) + nd = torch.cuda.device_count() # number of CUDA devices + nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers + sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) + #loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates + loader = DataLoader if image_weights or close_mosaic else InfiniteDataLoader + generator = torch.Generator() + generator.manual_seed(6148914691236517205 + RANK) + return loader( + dataset, + batch_size=batch_size, + shuffle=shuffle and sampler is None, + num_workers=nw, + sampler=sampler, + pin_memory=True, + collate_fn=LoadImagesAndLabelsAndMasks.collate_fn4 if quad else LoadImagesAndLabelsAndMasks.collate_fn, + worker_init_fn=seed_worker, + generator=generator, + ), dataset + + +class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing + + def __init__( + self, + path, + img_size=640, + batch_size=16, + augment=False, + hyp=None, + rect=False, + image_weights=False, + cache_images=False, + single_cls=False, + stride=32, + pad=0, + min_items=0, + prefix="", + downsample_ratio=1, + overlap=False, + ): + super().__init__(path, img_size, batch_size, augment, hyp, rect, image_weights, cache_images, single_cls, + stride, pad, min_items, prefix) + self.downsample_ratio = downsample_ratio + self.overlap = overlap + + def __getitem__(self, index): + index = self.indices[index] # linear, shuffled, or image_weights + + hyp = self.hyp + mosaic = self.mosaic and random.random() < hyp['mosaic'] + masks = [] + if mosaic: + # Load mosaic + img, labels, segments = self.load_mosaic(index) + shapes = None + + # MixUp augmentation + if random.random() < hyp["mixup"]: + img, labels, segments = mixup(img, labels, segments, *self.load_mosaic(random.randint(0, self.n - 1))) + + else: + # Load image + img, (h0, w0), (h, w) = self.load_image(index) + + # Letterbox + shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape + img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) + shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + + labels = self.labels[index].copy() + # [array, array, ....], array.shape=(num_points, 2), xyxyxyxy + segments = self.segments[index].copy() + if len(segments): + for i_s in range(len(segments)): + segments[i_s] = xyn2xy( + segments[i_s], + ratio[0] * w, + ratio[1] * h, + padw=pad[0], + padh=pad[1], + ) + if labels.size: # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) + + if self.augment: + img, labels, segments = random_perspective(img, + labels, + segments=segments, + degrees=hyp["degrees"], + translate=hyp["translate"], + scale=hyp["scale"], + shear=hyp["shear"], + perspective=hyp["perspective"]) + + nl = len(labels) # number of labels + if nl: + labels[:, 1:5] = xyxy2xywhn(labels[:, 1:5], w=img.shape[1], h=img.shape[0], clip=True, eps=1e-3) + if self.overlap: + masks, sorted_idx = polygons2masks_overlap(img.shape[:2], + segments, + downsample_ratio=self.downsample_ratio) + masks = masks[None] # (640, 640) -> (1, 640, 640) + labels = labels[sorted_idx] + else: + masks = polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio) + + masks = (torch.from_numpy(masks) if len(masks) else torch.zeros(1 if self.overlap else nl, img.shape[0] // + self.downsample_ratio, img.shape[1] // + self.downsample_ratio)) + # TODO: albumentations support + if self.augment: + # Albumentations + # there are some augmentation that won't change boxes and masks, + # so just be it for now. + img, labels = self.albumentations(img, labels) + nl = len(labels) # update after albumentations + + # HSV color-space + augment_hsv(img, hgain=hyp["hsv_h"], sgain=hyp["hsv_s"], vgain=hyp["hsv_v"]) + + # Flip up-down + if random.random() < hyp["flipud"]: + img = np.flipud(img) + if nl: + labels[:, 2] = 1 - labels[:, 2] + masks = torch.flip(masks, dims=[1]) + + # Flip left-right + if random.random() < hyp["fliplr"]: + img = np.fliplr(img) + if nl: + labels[:, 1] = 1 - labels[:, 1] + masks = torch.flip(masks, dims=[2]) + + # Cutouts # labels = cutout(img, labels, p=0.5) + + labels_out = torch.zeros((nl, 6)) + if nl: + labels_out[:, 1:] = torch.from_numpy(labels) + + # Convert + img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB + img = np.ascontiguousarray(img) + + return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks) + + def load_mosaic(self, index): + # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic + labels4, segments4 = [], [] + s = self.img_size + yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y + + # 3 additional image indices + indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices + for i, index in enumerate(indices): + # Load image + img, _, (h, w) = self.load_image(index) + + # place img in img4 + if i == 0: # top left + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + elif i == 1: # top right + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc + x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h + elif i == 2: # bottom left + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) + x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) + elif i == 3: # bottom right + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) + x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) + + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + padw = x1a - x1b + padh = y1a - y1b + + labels, segments = self.labels[index].copy(), self.segments[index].copy() + + if labels.size: + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format + segments = [xyn2xy(x, w, h, padw, padh) for x in segments] + labels4.append(labels) + segments4.extend(segments) + + # Concat/clip labels + labels4 = np.concatenate(labels4, 0) + for x in (labels4[:, 1:], *segments4): + np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() + # img4, labels4 = replicate(img4, labels4) # replicate + + # Augment + img4, labels4, segments4 = copy_paste(img4, labels4, segments4, p=self.hyp["copy_paste"]) + img4, labels4, segments4 = random_perspective(img4, + labels4, + segments4, + degrees=self.hyp["degrees"], + translate=self.hyp["translate"], + scale=self.hyp["scale"], + shear=self.hyp["shear"], + perspective=self.hyp["perspective"], + border=self.mosaic_border) # border to remove + return img4, labels4, segments4 + + @staticmethod + def collate_fn(batch): + img, label, path, shapes, masks = zip(*batch) # transposed + batched_masks = torch.cat(masks, 0) + for i, l in enumerate(label): + l[:, 0] = i # add target image index for build_targets() + return torch.stack(img, 0), torch.cat(label, 0), path, shapes, batched_masks + + +def polygon2mask(img_size, polygons, color=1, downsample_ratio=1): + """ + Args: + img_size (tuple): The image size. + polygons (np.ndarray): [N, M], N is the number of polygons, + M is the number of points(Be divided by 2). + """ + mask = np.zeros(img_size, dtype=np.uint8) + polygons = np.asarray(polygons) + polygons = polygons.astype(np.int32) + shape = polygons.shape + polygons = polygons.reshape(shape[0], -1, 2) + cv2.fillPoly(mask, polygons, color=color) + nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio) + # NOTE: fillPoly firstly then resize is trying the keep the same way + # of loss calculation when mask-ratio=1. + mask = cv2.resize(mask, (nw, nh)) + return mask + + +def polygons2masks(img_size, polygons, color, downsample_ratio=1): + """ + Args: + img_size (tuple): The image size. + polygons (list[np.ndarray]): each polygon is [N, M], + N is the number of polygons, + M is the number of points(Be divided by 2). + """ + masks = [] + for si in range(len(polygons)): + mask = polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio) + masks.append(mask) + return np.array(masks) + + +def polygons2masks_overlap(img_size, segments, downsample_ratio=1): + """Return a (640, 640) overlap mask.""" + masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio), + dtype=np.int32 if len(segments) > 255 else np.uint8) + areas = [] + ms = [] + for si in range(len(segments)): + mask = polygon2mask( + img_size, + [segments[si].reshape(-1)], + downsample_ratio=downsample_ratio, + color=1, + ) + ms.append(mask) + areas.append(mask.sum()) + areas = np.asarray(areas) + index = np.argsort(-areas) + ms = np.array(ms)[index] + for i in range(len(segments)): + mask = ms[i] * (i + 1) + masks = masks + mask + masks = np.clip(masks, a_min=0, a_max=i + 1) + return masks, index diff --git a/utils/segment/general.py b/utils/segment/general.py new file mode 100644 index 0000000000000000000000000000000000000000..b526333dc5a1b8625d7e6a51ee6ba41818c62adb --- /dev/null +++ b/utils/segment/general.py @@ -0,0 +1,137 @@ +import cv2 +import numpy as np +import torch +import torch.nn.functional as F + + +def crop_mask(masks, boxes): + """ + "Crop" predicted masks by zeroing out everything not in the predicted bbox. + Vectorized by Chong (thanks Chong). + + Args: + - masks should be a size [h, w, n] tensor of masks + - boxes should be a size [n, 4] tensor of bbox coords in relative point form + """ + + n, h, w = masks.shape + x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) + r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) + c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1) + + return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) + + +def process_mask_upsample(protos, masks_in, bboxes, shape): + """ + Crop after upsample. + proto_out: [mask_dim, mask_h, mask_w] + out_masks: [n, mask_dim], n is number of masks after nms + bboxes: [n, 4], n is number of masks after nms + shape:input_image_size, (h, w) + + return: h, w, n + """ + + c, mh, mw = protos.shape # CHW + masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) + masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW + masks = crop_mask(masks, bboxes) # CHW + return masks.gt_(0.5) + + +def process_mask(protos, masks_in, bboxes, shape, upsample=False): + """ + Crop before upsample. + proto_out: [mask_dim, mask_h, mask_w] + out_masks: [n, mask_dim], n is number of masks after nms + bboxes: [n, 4], n is number of masks after nms + shape:input_image_size, (h, w) + + return: h, w, n + """ + + c, mh, mw = protos.shape # CHW + ih, iw = shape + masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW + + downsampled_bboxes = bboxes.clone() + downsampled_bboxes[:, 0] *= mw / iw + downsampled_bboxes[:, 2] *= mw / iw + downsampled_bboxes[:, 3] *= mh / ih + downsampled_bboxes[:, 1] *= mh / ih + + masks = crop_mask(masks, downsampled_bboxes) # CHW + if upsample: + masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW + return masks.gt_(0.5) + + +def scale_image(im1_shape, masks, im0_shape, ratio_pad=None): + """ + img1_shape: model input shape, [h, w] + img0_shape: origin pic shape, [h, w, 3] + masks: [h, w, num] + """ + # Rescale coordinates (xyxy) from im1_shape to im0_shape + if ratio_pad is None: # calculate from im0_shape + gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new + pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding + else: + pad = ratio_pad[1] + top, left = int(pad[1]), int(pad[0]) # y, x + bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0]) + + if len(masks.shape) < 2: + raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}') + masks = masks[top:bottom, left:right] + # masks = masks.permute(2, 0, 1).contiguous() + # masks = F.interpolate(masks[None], im0_shape[:2], mode='bilinear', align_corners=False)[0] + # masks = masks.permute(1, 2, 0).contiguous() + masks = cv2.resize(masks, (im0_shape[1], im0_shape[0])) + + if len(masks.shape) == 2: + masks = masks[:, :, None] + return masks + + +def mask_iou(mask1, mask2, eps=1e-7): + """ + mask1: [N, n] m1 means number of predicted objects + mask2: [M, n] m2 means number of gt objects + Note: n means image_w x image_h + + return: masks iou, [N, M] + """ + intersection = torch.matmul(mask1, mask2.t()).clamp(0) + union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection + return intersection / (union + eps) + + +def masks_iou(mask1, mask2, eps=1e-7): + """ + mask1: [N, n] m1 means number of predicted objects + mask2: [N, n] m2 means number of gt objects + Note: n means image_w x image_h + + return: masks iou, (N, ) + """ + intersection = (mask1 * mask2).sum(1).clamp(0) # (N, ) + union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection + return intersection / (union + eps) + + +def masks2segments(masks, strategy='largest'): + # Convert masks(n,160,160) into segments(n,xy) + segments = [] + for x in masks.int().cpu().numpy().astype('uint8'): + c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] + if c: + if strategy == 'concat': # concatenate all segments + c = np.concatenate([x.reshape(-1, 2) for x in c]) + elif strategy == 'largest': # select largest segment + c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2) + else: + c = np.zeros((0, 2)) # no segments found + segments.append(c.astype('float32')) + return segments diff --git a/utils/segment/loss.py b/utils/segment/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..b45b2c27e0a05c275cbc50064288aece3ae3e856 --- /dev/null +++ b/utils/segment/loss.py @@ -0,0 +1,186 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..general import xywh2xyxy +from ..loss import FocalLoss, smooth_BCE +from ..metrics import bbox_iou +from ..torch_utils import de_parallel +from .general import crop_mask + + +class ComputeLoss: + # Compute losses + def __init__(self, model, autobalance=False, overlap=False): + self.sort_obj_iou = False + self.overlap = overlap + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + self.device = device + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) + BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets + + # Focal loss + g = h['fl_gamma'] # focal loss gamma + if g > 0: + BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index + self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance + self.na = m.na # number of anchors + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.nm = m.nm # number of masks + self.anchors = m.anchors + self.device = device + + def __call__(self, preds, targets, masks): # predictions, targets, model + p, proto = preds + bs, nm, mask_h, mask_w = proto.shape # batch size, number of masks, mask height, mask width + lcls = torch.zeros(1, device=self.device) + lbox = torch.zeros(1, device=self.device) + lobj = torch.zeros(1, device=self.device) + lseg = torch.zeros(1, device=self.device) + tcls, tbox, indices, anchors, tidxs, xywhn = self.build_targets(p, targets) # targets + + # Losses + for i, pi in enumerate(p): # layer index, layer predictions + b, a, gj, gi = indices[i] # image, anchor, gridy, gridx + tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj + + n = b.shape[0] # number of targets + if n: + pxy, pwh, _, pcls, pmask = pi[b, a, gj, gi].split((2, 2, 1, self.nc, nm), 1) # subset of predictions + + # Box regression + pxy = pxy.sigmoid() * 2 - 0.5 + pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i] + pbox = torch.cat((pxy, pwh), 1) # predicted box + iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target) + lbox += (1.0 - iou).mean() # iou loss + + # Objectness + iou = iou.detach().clamp(0).type(tobj.dtype) + if self.sort_obj_iou: + j = iou.argsort() + b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j] + if self.gr < 1: + iou = (1.0 - self.gr) + self.gr * iou + tobj[b, a, gj, gi] = iou # iou ratio + + # Classification + if self.nc > 1: # cls loss (only if multiple classes) + t = torch.full_like(pcls, self.cn, device=self.device) # targets + t[range(n), tcls[i]] = self.cp + lcls += self.BCEcls(pcls, t) # BCE + + # Mask regression + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode="nearest")[0] + marea = xywhn[i][:, 2:].prod(1) # mask width, height normalized + mxyxy = xywh2xyxy(xywhn[i] * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device)) + for bi in b.unique(): + j = b == bi # matching index + if self.overlap: + mask_gti = torch.where(masks[bi][None] == tidxs[i][j].view(-1, 1, 1), 1.0, 0.0) + else: + mask_gti = masks[tidxs[i]][j] + lseg += self.single_mask_loss(mask_gti, pmask[j], proto[bi], mxyxy[j], marea[j]) + + obji = self.BCEobj(pi[..., 4], tobj) + lobj += obji * self.balance[i] # obj loss + if self.autobalance: + self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() + + if self.autobalance: + self.balance = [x / self.balance[self.ssi] for x in self.balance] + lbox *= self.hyp["box"] + lobj *= self.hyp["obj"] + lcls *= self.hyp["cls"] + lseg *= self.hyp["box"] / bs + + loss = lbox + lobj + lcls + lseg + return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach() + + def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): + # Mask loss for one image + pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80) + loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none") + return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() + + def build_targets(self, p, targets): + # Build targets for compute_loss(), input targets(image,class,x,y,w,h) + na, nt = self.na, targets.shape[0] # number of anchors, targets + tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], [] + gain = torch.ones(8, device=self.device) # normalized to gridspace gain + ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) + if self.overlap: + batch = p[0].shape[0] + ti = [] + for i in range(batch): + num = (targets[:, 0] == i).sum() # find number of targets of each image + ti.append(torch.arange(num, device=self.device).float().view(1, num).repeat(na, 1) + 1) # (na, num) + ti = torch.cat(ti, 1) # (na, nt) + else: + ti = torch.arange(nt, device=self.device).float().view(1, nt).repeat(na, 1) + targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None], ti[..., None]), 2) # append anchor indices + + g = 0.5 # bias + off = torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=self.device).float() * g # offsets + + for i in range(self.nl): + anchors, shape = self.anchors[i], p[i].shape + gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain + + # Match targets to anchors + t = targets * gain # shape(3,n,7) + if nt: + # Matches + r = t[..., 4:6] / anchors[:, None] # wh ratio + j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare + # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) + t = t[j] # filter + + # Offsets + gxy = t[:, 2:4] # grid xy + gxi = gain[[2, 3]] - gxy # inverse + j, k = ((gxy % 1 < g) & (gxy > 1)).T + l, m = ((gxi % 1 < g) & (gxi > 1)).T + j = torch.stack((torch.ones_like(j), j, k, l, m)) + t = t.repeat((5, 1, 1))[j] + offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] + else: + t = targets[0] + offsets = 0 + + # Define + bc, gxy, gwh, at = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors + (a, tidx), (b, c) = at.long().T, bc.long().T # anchors, image, class + gij = (gxy - offsets).long() + gi, gj = gij.T # grid indices + + # Append + indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid + tbox.append(torch.cat((gxy - gij, gwh), 1)) # box + anch.append(anchors[a]) # anchors + tcls.append(c) # class + tidxs.append(tidx) + xywhn.append(torch.cat((gxy, gwh), 1) / gain[2:6]) # xywh normalized + + return tcls, tbox, indices, anch, tidxs, xywhn diff --git a/utils/segment/loss_tal.py b/utils/segment/loss_tal.py new file mode 100644 index 0000000000000000000000000000000000000000..3f90b27ef7c25df65e072f1d26aaaa4305e83460 --- /dev/null +++ b/utils/segment/loss_tal.py @@ -0,0 +1,261 @@ +import os + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from torchvision.ops import sigmoid_focal_loss + +from utils.general import xywh2xyxy, xyxy2xywh +from utils.metrics import bbox_iou +from utils.segment.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist +from utils.segment.tal.assigner import TaskAlignedAssigner +from utils.torch_utils import de_parallel +from utils.segment.general import crop_mask + + +def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + # return positive, negative label smoothing BCE targets + return 1.0 - 0.5 * eps, 0.5 * eps + + +class VarifocalLoss(nn.Module): + # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 + def __init__(self): + super().__init__() + + def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): + weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label + with torch.cuda.amp.autocast(enabled=False): + loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), + reduction="none") * weight).sum() + return loss + + +class FocalLoss(nn.Module): + # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super().__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = "none" # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = torch.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == "mean": + return loss.mean() + elif self.reduction == "sum": + return loss.sum() + else: # 'none' + return loss + + +class BboxLoss(nn.Module): + def __init__(self, reg_max, use_dfl=False): + super().__init__() + self.reg_max = reg_max + self.use_dfl = use_dfl + + def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): + # iou loss + bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) + pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) + target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) + bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) + + iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) + loss_iou = 1.0 - iou + + loss_iou *= bbox_weight + loss_iou = loss_iou.sum() / target_scores_sum + + # dfl loss + if self.use_dfl: + dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) + pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) + target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) + target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) + loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight + loss_dfl = loss_dfl.sum() / target_scores_sum + else: + loss_dfl = torch.tensor(0.0).to(pred_dist.device) + + return loss_iou, loss_dfl, iou + + def _df_loss(self, pred_dist, target): + target_left = target.to(torch.long) + target_right = target_left + 1 + weight_left = target_right.to(torch.float) - target + weight_right = 1 - weight_left + loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( + target_left.shape) * weight_left + loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), + reduction="none").view(target_left.shape) * weight_right + return (loss_left + loss_right).mean(-1, keepdim=True) + + +class ComputeLoss: + # Compute losses + def __init__(self, model, use_dfl=True, overlap=True): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls = FocalLoss(BCEcls, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.BCEcls = BCEcls + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.no = m.no + self.nm = m.nm + self.overlap = overlap + self.reg_max = m.reg_max + self.device = device + + self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 + self.use_dfl = use_dfl + + def preprocess(self, targets, batch_size, scale_tensor): + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, p, targets, masks, img=None, epoch=0): + loss = torch.zeros(4, device=self.device) # box, cls, dfl + feats, pred_masks, proto = p if len(p) == 3 else p[1] + batch_size, _, mask_h, mask_w = proto.shape + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + pred_masks = pred_masks.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size, grid_size = pred_scores.shape[:2] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + try: + batch_idx = targets[:, 0].view(-1, 1) + targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + except RuntimeError as e: + raise TypeError('ERROR.') from e + + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + + target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( + pred_scores.detach().sigmoid(), + (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_scores_sum = target_scores.sum() + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + # bbox loss + if fg_mask.sum(): + loss[0], loss[3], _ = self.bbox_loss(pred_distri, + pred_bboxes, + anchor_points, + target_bboxes / stride_tensor, + target_scores, + target_scores_sum, + fg_mask) + + # masks loss + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] + + for i in range(batch_size): + if fg_mask[i].sum(): + mask_idx = target_gt_idx[i][fg_mask[i]] + if self.overlap: + gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) + else: + gt_mask = masks[batch_idx.view(-1) == i][mask_idx] + xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] + marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) + mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) + loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, + marea) # seg loss + + loss[0] *= 7.5 # box gain + loss[1] *= 2.5 / batch_size + loss[2] *= 0.5 # cls gain + loss[3] *= 1.5 # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): + # Mask loss for one image + pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) + loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') + #loss = sigmoid_focal_loss(pred_mask, gt_mask, alpha = .25, gamma = 2., reduction = 'none') + + return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() + + #p_m = torch.flatten(pred_mask.sigmoid()) + #p_m = torch.flatten(pred_mask.softmax(dim = 1)) + #g_m = torch.flatten(gt_mask) + #i_m = torch.sum(torch.mul(p_m, g_m)) + #u_m = torch.sum(torch.add(p_m, g_m)) + #d_c = (2. * i_m + 1.) / (u_m + 1.) + #d_l = (1. - d_c) + #return d_l diff --git a/utils/segment/loss_tal_dual.py b/utils/segment/loss_tal_dual.py new file mode 100644 index 0000000000000000000000000000000000000000..87bb8ebfb3008ec4dc37b981e8fa559a7e90b68d --- /dev/null +++ b/utils/segment/loss_tal_dual.py @@ -0,0 +1,727 @@ +import os + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from torchvision.ops import sigmoid_focal_loss + +from utils.general import xywh2xyxy, xyxy2xywh +from utils.metrics import bbox_iou +from utils.segment.tal.anchor_generator import dist2bbox, make_anchors, bbox2dist +from utils.segment.tal.assigner import TaskAlignedAssigner +from utils.torch_utils import de_parallel +from utils.segment.general import crop_mask + + +def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 + # return positive, negative label smoothing BCE targets + return 1.0 - 0.5 * eps, 0.5 * eps + + +class VarifocalLoss(nn.Module): + # Varifocal loss by Zhang et al. https://arxiv.org/abs/2008.13367 + def __init__(self): + super().__init__() + + def forward(self, pred_score, gt_score, label, alpha=0.75, gamma=2.0): + weight = alpha * pred_score.sigmoid().pow(gamma) * (1 - label) + gt_score * label + with torch.cuda.amp.autocast(enabled=False): + loss = (F.binary_cross_entropy_with_logits(pred_score.float(), gt_score.float(), + reduction="none") * weight).sum() + return loss + + +class FocalLoss(nn.Module): + # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) + def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): + super().__init__() + self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() + self.gamma = gamma + self.alpha = alpha + self.reduction = loss_fcn.reduction + self.loss_fcn.reduction = "none" # required to apply FL to each element + + def forward(self, pred, true): + loss = self.loss_fcn(pred, true) + # p_t = torch.exp(-loss) + # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability + + # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py + pred_prob = torch.sigmoid(pred) # prob from logits + p_t = true * pred_prob + (1 - true) * (1 - pred_prob) + alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) + modulating_factor = (1.0 - p_t) ** self.gamma + loss *= alpha_factor * modulating_factor + + if self.reduction == "mean": + return loss.mean() + elif self.reduction == "sum": + return loss.sum() + else: # 'none' + return loss + + +class BboxLoss(nn.Module): + def __init__(self, reg_max, use_dfl=False): + super().__init__() + self.reg_max = reg_max + self.use_dfl = use_dfl + + def forward(self, pred_dist, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask): + # iou loss + bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4]) # (b, h*w, 4) + pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).view(-1, 4) + target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).view(-1, 4) + bbox_weight = torch.masked_select(target_scores.sum(-1), fg_mask).unsqueeze(-1) + + iou = bbox_iou(pred_bboxes_pos, target_bboxes_pos, xywh=False, CIoU=True) + loss_iou = 1.0 - iou + + loss_iou *= bbox_weight + loss_iou = loss_iou.sum() / target_scores_sum + + # dfl loss + if self.use_dfl: + dist_mask = fg_mask.unsqueeze(-1).repeat([1, 1, (self.reg_max + 1) * 4]) + pred_dist_pos = torch.masked_select(pred_dist, dist_mask).view(-1, 4, self.reg_max + 1) + target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max) + target_ltrb_pos = torch.masked_select(target_ltrb, bbox_mask).view(-1, 4) + loss_dfl = self._df_loss(pred_dist_pos, target_ltrb_pos) * bbox_weight + loss_dfl = loss_dfl.sum() / target_scores_sum + else: + loss_dfl = torch.tensor(0.0).to(pred_dist.device) + + return loss_iou, loss_dfl, iou + + def _df_loss(self, pred_dist, target): + target_left = target.to(torch.long) + target_right = target_left + 1 + weight_left = target_right.to(torch.float) - target + weight_right = 1 - weight_left + loss_left = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction="none").view( + target_left.shape) * weight_left + loss_right = F.cross_entropy(pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), + reduction="none").view(target_left.shape) * weight_right + return (loss_left + loss_right).mean(-1, keepdim=True) + + +class ComputeLoss: + # Compute losses + def __init__(self, model, use_dfl=True, overlap=True): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls = FocalLoss(BCEcls, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.BCEcls = BCEcls + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.no = m.no + self.nm = m.nm + self.overlap = overlap + self.reg_max = m.reg_max + self.device = device + + self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.assigner2 = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.bbox_loss2 = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 + self.use_dfl = use_dfl + + def preprocess(self, targets, batch_size, scale_tensor): + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, p, targets, masks, img=None, epoch=0): + loss = torch.zeros(4, device=self.device) # box, cls, dfl + + feats_, pred_masks_, proto_ = p if len(p) == 3 else p[1] + + feats, pred_masks, proto = feats_[0], pred_masks_[0], proto_[0] + feats2, pred_masks2, proto2 = feats_[1], pred_masks_[1], proto_[1] + + batch_size, _, mask_h, mask_w = proto.shape + + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + pred_masks = pred_masks.permute(0, 2, 1).contiguous() + + pred_distri2, pred_scores2 = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats2], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() + pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() + pred_masks2 = pred_masks2.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size, grid_size = pred_scores.shape[:2] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + try: + batch_idx = targets[:, 0].view(-1, 1) + targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + except RuntimeError as e: + raise TypeError('ERROR.') from e + + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + + pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) + + target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( + pred_scores.detach().sigmoid(), + (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_labels2, target_bboxes2, target_scores2, fg_mask2, target_gt_idx2 = self.assigner2( + pred_scores2.detach().sigmoid(), + (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_scores_sum = target_scores.sum() + + target_scores_sum2 = target_scores2.sum() + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + loss[2] *= 0.25 + loss[2] += self.BCEcls(pred_scores2, target_scores2.to(dtype)).sum() / target_scores_sum2 # BCE + + # bbox loss + if fg_mask.sum(): + loss[0], loss[3], _ = self.bbox_loss(pred_distri, + pred_bboxes, + anchor_points, + target_bboxes / stride_tensor, + target_scores, + target_scores_sum, + fg_mask) + + # masks loss + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] + + for i in range(batch_size): + if fg_mask[i].sum(): + mask_idx = target_gt_idx[i][fg_mask[i]] + if self.overlap: + gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) + else: + gt_mask = masks[batch_idx.view(-1) == i][mask_idx] + xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] + marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) + mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) + loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, + marea) # seg loss + + loss[0] *= 0.25 + loss[3] *= 0.25 + loss[1] *= 0.25 + + # bbox loss + if fg_mask2.sum(): + loss0_, loss3_, _ = self.bbox_loss2(pred_distri2, + pred_bboxes2, + anchor_points, + target_bboxes2 / stride_tensor, + target_scores2, + target_scores_sum2, + fg_mask2) + + # masks loss + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] + + for i in range(batch_size): + if fg_mask2[i].sum(): + mask_idx = target_gt_idx2[i][fg_mask2[i]] + if self.overlap: + gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) + else: + gt_mask = masks[batch_idx.view(-1) == i][mask_idx] + xyxyn = target_bboxes2[i][fg_mask2[i]] / imgsz[[1, 0, 1, 0]] + marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) + mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) + loss[1] += self.single_mask_loss(gt_mask, pred_masks2[i][fg_mask2[i]], proto2[i], mxyxy, + marea) # seg loss + + loss[0] += loss0_ + loss[3] += loss3_ + + loss[0] *= 7.5 # box gain + loss[1] *= 2.5 / batch_size + loss[2] *= 0.5 # cls gain + loss[3] *= 1.5 # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): + # Mask loss for one image + pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) + loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') + #loss = sigmoid_focal_loss(pred_mask, gt_mask, alpha = .25, gamma = 2., reduction = 'none') + + #p_m = torch.flatten(pred_mask.softmax(dim = 1)) + #g_m = torch.flatten(gt_mask) + #i_m = torch.sum(torch.mul(p_m, g_m)) + #u_m = torch.sum(torch.add(p_m, g_m)) + #dice_coef = (2. * i_m + 1.) / (u_m + 1.) + #dice_loss = (1. - dice_coef) + return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() + + +class ComputeLossLH: + # Compute losses + def __init__(self, model, use_dfl=True, overlap=True): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls = FocalLoss(BCEcls, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.BCEcls = BCEcls + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.no = m.no + self.nm = m.nm + self.overlap = overlap + self.reg_max = m.reg_max + self.device = device + + self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 + self.use_dfl = use_dfl + + def preprocess(self, targets, batch_size, scale_tensor): + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, p, targets, masks, img=None, epoch=0): + loss = torch.zeros(4, device=self.device) # box, cls, dfl + + feats_, pred_masks_, proto_ = p if len(p) == 3 else p[1] + + feats, pred_masks, proto = feats_[0], pred_masks_[0], proto_[0] + feats2, pred_masks2, proto2 = feats_[1], pred_masks_[1], proto_[1] + + batch_size, _, mask_h, mask_w = proto.shape + + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + pred_masks = pred_masks.permute(0, 2, 1).contiguous() + + pred_distri2, pred_scores2 = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats2], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() + pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() + pred_masks2 = pred_masks2.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size, grid_size = pred_scores.shape[:2] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + try: + batch_idx = targets[:, 0].view(-1, 1) + targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + except RuntimeError as e: + raise TypeError('ERROR.') from e + + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + + pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) + + target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( + pred_scores2.detach().sigmoid(), + (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_scores_sum = target_scores.sum() + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + loss[2] *= 0.25 + loss[2] += self.BCEcls(pred_scores2, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + # bbox loss + if fg_mask.sum(): + loss[0], loss[3], _ = self.bbox_loss(pred_distri, + pred_bboxes, + anchor_points, + target_bboxes / stride_tensor, + target_scores, + target_scores_sum, + fg_mask) + + # masks loss + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] + + for i in range(batch_size): + if fg_mask[i].sum(): + mask_idx = target_gt_idx[i][fg_mask[i]] + if self.overlap: + gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) + else: + gt_mask = masks[batch_idx.view(-1) == i][mask_idx] + xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] + marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) + mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) + loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, + marea) # seg loss + + loss[0] *= 0.25 + loss[3] *= 0.25 + loss[1] *= 0.25 + + # bbox loss + if fg_mask.sum(): + loss0_, loss3_, _ = self.bbox_loss(pred_distri2, + pred_bboxes2, + anchor_points, + target_bboxes / stride_tensor, + target_scores, + target_scores_sum, + fg_mask) + + # masks loss + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] + + for i in range(batch_size): + if fg_mask[i].sum(): + mask_idx = target_gt_idx[i][fg_mask[i]] + if self.overlap: + gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) + else: + gt_mask = masks[batch_idx.view(-1) == i][mask_idx] + xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] + marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) + mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) + loss[1] += self.single_mask_loss(gt_mask, pred_masks2[i][fg_mask[i]], proto2[i], mxyxy, + marea) # seg loss + + loss[0] += loss0_ + loss[3] += loss3_ + + loss[0] *= 7.5 # box gain + loss[1] *= 2.5 / batch_size + loss[2] *= 0.5 # cls gain + loss[3] *= 1.5 # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): + # Mask loss for one image + pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) + loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') + #loss = sigmoid_focal_loss(pred_mask, gt_mask, alpha = .25, gamma = 2., reduction = 'none') + + #p_m = torch.flatten(pred_mask.softmax(dim = 1)) + #g_m = torch.flatten(gt_mask) + #i_m = torch.sum(torch.mul(p_m, g_m)) + #u_m = torch.sum(torch.add(p_m, g_m)) + #dice_coef = (2. * i_m + 1.) / (u_m + 1.) + #dice_loss = (1. - dice_coef) + return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() + + +class ComputeLossLH0: + # Compute losses + def __init__(self, model, use_dfl=True, overlap=True): + device = next(model.parameters()).device # get model device + h = model.hyp # hyperparameters + + # Define criteria + BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device), reduction='none') + + # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 + self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets + + # Focal loss + g = h["fl_gamma"] # focal loss gamma + if g > 0: + BCEcls = FocalLoss(BCEcls, g) + + m = de_parallel(model).model[-1] # Detect() module + self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 + self.BCEcls = BCEcls + self.hyp = h + self.stride = m.stride # model strides + self.nc = m.nc # number of classes + self.nl = m.nl # number of layers + self.no = m.no + self.nm = m.nm + self.overlap = overlap + self.reg_max = m.reg_max + self.device = device + + self.assigner = TaskAlignedAssigner(topk=int(os.getenv('YOLOM', 10)), + num_classes=self.nc, + alpha=float(os.getenv('YOLOA', 0.5)), + beta=float(os.getenv('YOLOB', 6.0))) + self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=use_dfl).to(device) + self.proj = torch.arange(m.reg_max).float().to(device) # / 120.0 + self.use_dfl = use_dfl + + def preprocess(self, targets, batch_size, scale_tensor): + if targets.shape[0] == 0: + out = torch.zeros(batch_size, 0, 5, device=self.device) + else: + i = targets[:, 0] # image index + _, counts = i.unique(return_counts=True) + out = torch.zeros(batch_size, counts.max(), 5, device=self.device) + for j in range(batch_size): + matches = i == j + n = matches.sum() + if n: + out[j, :n] = targets[matches, 1:] + out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor)) + return out + + def bbox_decode(self, anchor_points, pred_dist): + if self.use_dfl: + b, a, c = pred_dist.shape # batch, anchors, channels + pred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype)) + # pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2) + return dist2bbox(pred_dist, anchor_points, xywh=False) + + def __call__(self, p, targets, masks, img=None, epoch=0): + loss = torch.zeros(4, device=self.device) # box, cls, dfl + + feats_, pred_masks_, proto_ = p if len(p) == 3 else p[1] + + feats, pred_masks, proto = feats_[0], pred_masks_[0], proto_[0] + feats2, pred_masks2, proto2 = feats_[1], pred_masks_[1], proto_[1] + + batch_size, _, mask_h, mask_w = proto.shape + + pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores = pred_scores.permute(0, 2, 1).contiguous() + pred_distri = pred_distri.permute(0, 2, 1).contiguous() + pred_masks = pred_masks.permute(0, 2, 1).contiguous() + + pred_distri2, pred_scores2 = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats2], 2).split( + (self.reg_max * 4, self.nc), 1) + pred_scores2 = pred_scores2.permute(0, 2, 1).contiguous() + pred_distri2 = pred_distri2.permute(0, 2, 1).contiguous() + pred_masks2 = pred_masks2.permute(0, 2, 1).contiguous() + + dtype = pred_scores.dtype + batch_size, grid_size = pred_scores.shape[:2] + imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0] # image size (h,w) + anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5) + + # targets + try: + batch_idx = targets[:, 0].view(-1, 1) + targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]) + gt_labels, gt_bboxes = targets.split((1, 4), 2) # cls, xyxy + mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0) + except RuntimeError as e: + raise TypeError('ERROR.') from e + + + # pboxes + pred_bboxes = self.bbox_decode(anchor_points, pred_distri) # xyxy, (b, h*w, 4) + + pred_bboxes2 = self.bbox_decode(anchor_points, pred_distri2) # xyxy, (b, h*w, 4) + + target_labels, target_bboxes, target_scores, fg_mask, target_gt_idx = self.assigner( + pred_scores2.detach().sigmoid(), + (pred_bboxes2.detach() * stride_tensor).type(gt_bboxes.dtype), + anchor_points * stride_tensor, + gt_labels, + gt_bboxes, + mask_gt) + + target_scores_sum = target_scores.sum() + + # cls loss + # loss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum # VFL way + loss[2] = self.BCEcls(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum # BCE + loss[2] *= 0.25 + loss[2] += self.BCEcls(pred_scores2, target_scores.to(dtype)).sum() / target_scores_sum # BCE + + # bbox loss + if fg_mask.sum(): + loss[0], loss[3], _ = self.bbox_loss(pred_distri, + pred_bboxes, + anchor_points, + target_bboxes / stride_tensor, + target_scores, + target_scores_sum, + fg_mask) + + # masks loss + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] + + for i in range(batch_size): + if fg_mask[i].sum(): + mask_idx = target_gt_idx[i][fg_mask[i]] + if self.overlap: + gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) + else: + gt_mask = masks[batch_idx.view(-1) == i][mask_idx] + xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] + marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) + mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) + loss[1] += self.single_mask_loss(gt_mask, pred_masks[i][fg_mask[i]], proto[i], mxyxy, + marea) # seg loss + + loss[0] *= 0.25 + loss[3] *= 0.25 + loss[1] *= 0.25 + + # bbox loss + if fg_mask.sum(): + loss0_, loss3_, _ = self.bbox_loss(pred_distri2, + pred_bboxes2, + anchor_points, + target_bboxes / stride_tensor, + target_scores, + target_scores_sum, + fg_mask) + + # masks loss + if tuple(masks.shape[-2:]) != (mask_h, mask_w): # downsample + masks = F.interpolate(masks[None], (mask_h, mask_w), mode='nearest')[0] + + for i in range(batch_size): + if fg_mask[i].sum(): + mask_idx = target_gt_idx[i][fg_mask[i]] + if self.overlap: + gt_mask = torch.where(masks[[i]] == (mask_idx + 1).view(-1, 1, 1), 1.0, 0.0) + else: + gt_mask = masks[batch_idx.view(-1) == i][mask_idx] + xyxyn = target_bboxes[i][fg_mask[i]] / imgsz[[1, 0, 1, 0]] + marea = xyxy2xywh(xyxyn)[:, 2:].prod(1) + mxyxy = xyxyn * torch.tensor([mask_w, mask_h, mask_w, mask_h], device=self.device) + loss[1] += 0. * self.single_mask_loss(gt_mask, pred_masks2[i][fg_mask[i]], proto2[i], mxyxy, + marea) # seg loss + + loss[0] += loss0_ + loss[3] += loss3_ + + loss[0] *= 7.5 # box gain + loss[1] *= 2.5 / batch_size + loss[2] *= 0.5 # cls gain + loss[3] *= 1.5 # dfl gain + + return loss.sum() * batch_size, loss.detach() # loss(box, cls, dfl) + + def single_mask_loss(self, gt_mask, pred, proto, xyxy, area): + # Mask loss for one image + pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n, 32) @ (32,80,80) -> (n,80,80) + loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction='none') + #loss = sigmoid_focal_loss(pred_mask, gt_mask, alpha = .25, gamma = 2., reduction = 'none') + + #p_m = torch.flatten(pred_mask.softmax(dim = 1)) + #g_m = torch.flatten(gt_mask) + #i_m = torch.sum(torch.mul(p_m, g_m)) + #u_m = torch.sum(torch.add(p_m, g_m)) + #dice_coef = (2. * i_m + 1.) / (u_m + 1.) + #dice_loss = (1. - dice_coef) + return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean() diff --git a/utils/segment/metrics.py b/utils/segment/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..e6e5a0ad37ef3dad84cb2a247271efcaee752f11 --- /dev/null +++ b/utils/segment/metrics.py @@ -0,0 +1,205 @@ +import numpy as np + +from ..metrics import ap_per_class + + +def fitness(x): + # Model fitness as a weighted combination of metrics + w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9] + return (x[:, :8] * w).sum(1) + + +def ap_per_class_box_and_mask( + tp_m, + tp_b, + conf, + pred_cls, + target_cls, + plot=False, + save_dir=".", + names=(), +): + """ + Args: + tp_b: tp of boxes. + tp_m: tp of masks. + other arguments see `func: ap_per_class`. + """ + results_boxes = ap_per_class(tp_b, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Box")[2:] + results_masks = ap_per_class(tp_m, + conf, + pred_cls, + target_cls, + plot=plot, + save_dir=save_dir, + names=names, + prefix="Mask")[2:] + + results = { + "boxes": { + "p": results_boxes[0], + "r": results_boxes[1], + "ap": results_boxes[3], + "f1": results_boxes[2], + "ap_class": results_boxes[4]}, + "masks": { + "p": results_masks[0], + "r": results_masks[1], + "ap": results_masks[3], + "f1": results_masks[2], + "ap_class": results_masks[4]}} + return results + + +class Metric: + + def __init__(self) -> None: + self.p = [] # (nc, ) + self.r = [] # (nc, ) + self.f1 = [] # (nc, ) + self.all_ap = [] # (nc, 10) + self.ap_class_index = [] # (nc, ) + + @property + def ap50(self): + """AP@0.5 of all classes. + Return: + (nc, ) or []. + """ + return self.all_ap[:, 0] if len(self.all_ap) else [] + + @property + def ap(self): + """AP@0.5:0.95 + Return: + (nc, ) or []. + """ + return self.all_ap.mean(1) if len(self.all_ap) else [] + + @property + def mp(self): + """mean precision of all classes. + Return: + float. + """ + return self.p.mean() if len(self.p) else 0.0 + + @property + def mr(self): + """mean recall of all classes. + Return: + float. + """ + return self.r.mean() if len(self.r) else 0.0 + + @property + def map50(self): + """Mean AP@0.5 of all classes. + Return: + float. + """ + return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0 + + @property + def map(self): + """Mean AP@0.5:0.95 of all classes. + Return: + float. + """ + return self.all_ap.mean() if len(self.all_ap) else 0.0 + + def mean_results(self): + """Mean of results, return mp, mr, map50, map""" + return (self.mp, self.mr, self.map50, self.map) + + def class_result(self, i): + """class-aware result, return p[i], r[i], ap50[i], ap[i]""" + return (self.p[i], self.r[i], self.ap50[i], self.ap[i]) + + def get_maps(self, nc): + maps = np.zeros(nc) + self.map + for i, c in enumerate(self.ap_class_index): + maps[c] = self.ap[i] + return maps + + def update(self, results): + """ + Args: + results: tuple(p, r, ap, f1, ap_class) + """ + p, r, all_ap, f1, ap_class_index = results + self.p = p + self.r = r + self.all_ap = all_ap + self.f1 = f1 + self.ap_class_index = ap_class_index + + +class Metrics: + """Metric for boxes and masks.""" + + def __init__(self) -> None: + self.metric_box = Metric() + self.metric_mask = Metric() + + def update(self, results): + """ + Args: + results: Dict{'boxes': Dict{}, 'masks': Dict{}} + """ + self.metric_box.update(list(results["boxes"].values())) + self.metric_mask.update(list(results["masks"].values())) + + def mean_results(self): + return self.metric_box.mean_results() + self.metric_mask.mean_results() + + def class_result(self, i): + return self.metric_box.class_result(i) + self.metric_mask.class_result(i) + + def get_maps(self, nc): + return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc) + + @property + def ap_class_index(self): + # boxes and masks have the same ap_class_index + return self.metric_box.ap_class_index + + +KEYS = [ + "train/box_loss", + "train/seg_loss", # train loss + "train/obj_loss", + "train/cls_loss", + "metrics/precision(B)", + "metrics/recall(B)", + "metrics/mAP_0.5(B)", + "metrics/mAP_0.5:0.95(B)", # metrics + "metrics/precision(M)", + "metrics/recall(M)", + "metrics/mAP_0.5(M)", + "metrics/mAP_0.5:0.95(M)", # metrics + "val/box_loss", + "val/seg_loss", # val loss + "val/obj_loss", + "val/cls_loss", + "x/lr0", + "x/lr1", + "x/lr2",] + +BEST_KEYS = [ + "best/epoch", + "best/precision(B)", + "best/recall(B)", + "best/mAP_0.5(B)", + "best/mAP_0.5:0.95(B)", + "best/precision(M)", + "best/recall(M)", + "best/mAP_0.5(M)", + "best/mAP_0.5:0.95(M)",] diff --git a/utils/segment/plots.py b/utils/segment/plots.py new file mode 100644 index 0000000000000000000000000000000000000000..9b90900b3772fe23dbd57deb64221f98e563b069 --- /dev/null +++ b/utils/segment/plots.py @@ -0,0 +1,143 @@ +import contextlib +import math +from pathlib import Path + +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import torch + +from .. import threaded +from ..general import xywh2xyxy +from ..plots import Annotator, colors + + +@threaded +def plot_images_and_masks(images, targets, masks, paths=None, fname='images.jpg', names=None): + # Plot image grid with labels + if isinstance(images, torch.Tensor): + images = images.cpu().float().numpy() + if isinstance(targets, torch.Tensor): + targets = targets.cpu().numpy() + if isinstance(masks, torch.Tensor): + masks = masks.cpu().numpy().astype(int) + + max_size = 1920 # max image size + max_subplots = 16 # max image subplots, i.e. 4x4 + bs, _, h, w = images.shape # batch size, _, height, width + bs = min(bs, max_subplots) # limit plot images + ns = np.ceil(bs ** 0.5) # number of subplots (square) + if np.max(images[0]) <= 1: + images *= 255 # de-normalise (optional) + + # Build Image + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + for i, im in enumerate(images): + if i == max_subplots: # if last batch has fewer images than we expect + break + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + im = im.transpose(1, 2, 0) + mosaic[y:y + h, x:x + w, :] = im + + # Resize (optional) + scale = max_size / ns / max(h, w) + if scale < 1: + h = math.ceil(scale * h) + w = math.ceil(scale * w) + mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h))) + + # Annotate + fs = int((h + w) * ns * 0.01) # font size + annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names) + for i in range(i + 1): + x, y = int(w * (i // ns)), int(h * (i % ns)) # block origin + annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2) # borders + if paths: + annotator.text((x + 5, y + 5 + h), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220)) # filenames + if len(targets) > 0: + idx = targets[:, 0] == i + ti = targets[idx] # image targets + + boxes = xywh2xyxy(ti[:, 2:6]).T + classes = ti[:, 1].astype('int') + labels = ti.shape[1] == 6 # labels if no conf column + conf = None if labels else ti[:, 6] # check for confidence presence (label vs pred) + + if boxes.shape[1]: + if boxes.max() <= 1.01: # if normalized with tolerance 0.01 + boxes[[0, 2]] *= w # scale to pixels + boxes[[1, 3]] *= h + elif scale < 1: # absolute coords need scale if image scales + boxes *= scale + boxes[[0, 2]] += x + boxes[[1, 3]] += y + for j, box in enumerate(boxes.T.tolist()): + cls = classes[j] + color = colors(cls) + cls = names[cls] if names else cls + if labels or conf[j] > 0.25: # 0.25 conf thresh + label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}' + annotator.box_label(box, label, color=color) + + # Plot masks + if len(masks): + if masks.max() > 1.0: # mean that masks are overlap + image_masks = masks[[i]] # (1, 640, 640) + nl = len(ti) + index = np.arange(nl).reshape(nl, 1, 1) + 1 + image_masks = np.repeat(image_masks, nl, axis=0) + image_masks = np.where(image_masks == index, 1.0, 0.0) + else: + image_masks = masks[idx] + + im = np.asarray(annotator.im).copy() + for j, box in enumerate(boxes.T.tolist()): + if labels or conf[j] > 0.25: # 0.25 conf thresh + color = colors(classes[j]) + mh, mw = image_masks[j].shape + if mh != h or mw != w: + mask = image_masks[j].astype(np.uint8) + mask = cv2.resize(mask, (w, h)) + mask = mask.astype(bool) + else: + mask = image_masks[j].astype(bool) + with contextlib.suppress(Exception): + im[y:y + h, x:x + w, :][mask] = im[y:y + h, x:x + w, :][mask] * 0.4 + np.array(color) * 0.6 + annotator.fromarray(im) + annotator.im.save(fname) # save + + +def plot_results_with_masks(file="path/to/results.csv", dir="", best=True): + # Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv') + save_dir = Path(file).parent if file else Path(dir) + fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True) + ax = ax.ravel() + files = list(save_dir.glob("results*.csv")) + assert len(files), f"No results.csv files found in {save_dir.resolve()}, nothing to plot." + for f in files: + try: + data = pd.read_csv(f) + index = np.argmax(0.9 * data.values[:, 8] + 0.1 * data.values[:, 7] + 0.9 * data.values[:, 12] + + 0.1 * data.values[:, 11]) + s = [x.strip() for x in data.columns] + x = data.values[:, 0] + for i, j in enumerate([1, 2, 3, 4, 5, 6, 9, 10, 13, 14, 15, 16, 7, 8, 11, 12]): + y = data.values[:, j] + # y[y == 0] = np.nan # don't show zero values + ax[i].plot(x, y, marker=".", label=f.stem, linewidth=2, markersize=2) + if best: + # best + ax[i].scatter(index, y[index], color="r", label=f"best:{index}", marker="*", linewidth=3) + ax[i].set_title(s[j] + f"\n{round(y[index], 5)}") + else: + # last + ax[i].scatter(x[-1], y[-1], color="r", label="last", marker="*", linewidth=3) + ax[i].set_title(s[j] + f"\n{round(y[-1], 5)}") + # if j in [8, 9, 10]: # share train and val loss y axes + # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) + except Exception as e: + print(f"Warning: Plotting error for {f}: {e}") + ax[1].legend() + fig.savefig(save_dir / "results.png", dpi=200) + plt.close() diff --git a/utils/segment/tal/__init__.py b/utils/segment/tal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..84952a8167bc2975913a6def6b4f027d566552a9 --- /dev/null +++ b/utils/segment/tal/__init__.py @@ -0,0 +1 @@ +# init \ No newline at end of file diff --git a/utils/segment/tal/anchor_generator.py b/utils/segment/tal/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..0de163651e21225445097f90e05a6c6d8ff10092 --- /dev/null +++ b/utils/segment/tal/anchor_generator.py @@ -0,0 +1,38 @@ +import torch + +from utils.general import check_version + +TORCH_1_10 = check_version(torch.__version__, '1.10.0') + + +def make_anchors(feats, strides, grid_cell_offset=0.5): + """Generate anchors from features.""" + anchor_points, stride_tensor = [], [] + assert feats is not None + dtype, device = feats[0].dtype, feats[0].device + for i, stride in enumerate(strides): + _, _, h, w = feats[i].shape + sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x + sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y + sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) + anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) + stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) + return torch.cat(anchor_points), torch.cat(stride_tensor) + + +def dist2bbox(distance, anchor_points, xywh=True, dim=-1): + """Transform distance(ltrb) to box(xywh or xyxy).""" + lt, rb = torch.split(distance, 2, dim) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if xywh: + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + return torch.cat((c_xy, wh), dim) # xywh bbox + return torch.cat((x1y1, x2y2), dim) # xyxy bbox + + +def bbox2dist(anchor_points, bbox, reg_max): + """Transform bbox(xyxy) to dist(ltrb).""" + x1y1, x2y2 = torch.split(bbox, 2, -1) + return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) diff --git a/utils/segment/tal/assigner.py b/utils/segment/tal/assigner.py new file mode 100644 index 0000000000000000000000000000000000000000..598b3575f83a0ec45449910bc2c5fde18dbaa054 --- /dev/null +++ b/utils/segment/tal/assigner.py @@ -0,0 +1,180 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils.metrics import bbox_iou + + +def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): + """select the positive anchor center in gt + + Args: + xy_centers (Tensor): shape(h*w, 4) + gt_bboxes (Tensor): shape(b, n_boxes, 4) + Return: + (Tensor): shape(b, n_boxes, h*w) + """ + n_anchors = xy_centers.shape[0] + bs, n_boxes, _ = gt_bboxes.shape + lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom + bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) + # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) + return bbox_deltas.amin(3).gt_(eps) + + +def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): + """if an anchor box is assigned to multiple gts, + the one with the highest iou will be selected. + + Args: + mask_pos (Tensor): shape(b, n_max_boxes, h*w) + overlaps (Tensor): shape(b, n_max_boxes, h*w) + Return: + target_gt_idx (Tensor): shape(b, h*w) + fg_mask (Tensor): shape(b, h*w) + mask_pos (Tensor): shape(b, n_max_boxes, h*w) + """ + # (b, n_max_boxes, h*w) -> (b, h*w) + fg_mask = mask_pos.sum(-2) + if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes + mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) + max_overlaps_idx = overlaps.argmax(1) # (b, h*w) + is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) + is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) + mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) + fg_mask = mask_pos.sum(-2) + # find each grid serve which gt(index) + target_gt_idx = mask_pos.argmax(-2) # (b, h*w) + return target_gt_idx, fg_mask, mask_pos + + +class TaskAlignedAssigner(nn.Module): + def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): + super().__init__() + self.topk = topk + self.num_classes = num_classes + self.bg_idx = num_classes + self.alpha = alpha + self.beta = beta + self.eps = eps + + @torch.no_grad() + def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): + """This code referenced to + https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py + + Args: + pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) + pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) + anc_points (Tensor): shape(num_total_anchors, 2) + gt_labels (Tensor): shape(bs, n_max_boxes, 1) + gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) + mask_gt (Tensor): shape(bs, n_max_boxes, 1) + Returns: + target_labels (Tensor): shape(bs, num_total_anchors) + target_bboxes (Tensor): shape(bs, num_total_anchors, 4) + target_scores (Tensor): shape(bs, num_total_anchors, num_classes) + fg_mask (Tensor): shape(bs, num_total_anchors) + """ + self.bs = pd_scores.size(0) + self.n_max_boxes = gt_bboxes.size(1) + + if self.n_max_boxes == 0: + device = gt_bboxes.device + return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), + torch.zeros_like(pd_bboxes).to(device), + torch.zeros_like(pd_scores).to(device), + torch.zeros_like(pd_scores[..., 0]).to(device), + torch.zeros_like(pd_scores[..., 0]).to(device)) + + mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, + mask_gt) + + target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) + + # assigned target + target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) + + # normalize + align_metric *= mask_pos + pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj + pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj + norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) + target_scores = target_scores * norm_align_metric + + return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_gt_idx + + def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): + + # get anchor_align metric, (b, max_num_obj, h*w) + align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) + # get in_gts mask, (b, max_num_obj, h*w) + mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) + # get topk_metric mask, (b, max_num_obj, h*w) + mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, + topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) + # merge all mask to a final mask, (b, max_num_obj, h*w) + mask_pos = mask_topk * mask_in_gts * mask_gt + + return mask_pos, align_metric, overlaps + + def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): + + gt_labels = gt_labels.to(torch.long) # b, max_num_obj, 1 + ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj + ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj + ind[1] = gt_labels.squeeze(-1) # b, max_num_obj + # get the scores of each grid for each gt cls + bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w + + overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) + align_metric = bbox_scores.pow(self.alpha) * (overlaps).pow(self.beta) + return align_metric, overlaps + + def select_topk_candidates(self, metrics, largest=True, topk_mask=None): + """ + Args: + metrics: (b, max_num_obj, h*w). + topk_mask: (b, max_num_obj, topk) or None + """ + + num_anchors = metrics.shape[-1] # h*w + # (b, max_num_obj, topk) + topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) + if topk_mask is None: + topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) + # (b, max_num_obj, topk) + topk_idxs = torch.where(topk_mask, topk_idxs, 0) + # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) + is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) + # filter invalid bboxes + # assigned topk should be unique, this is for dealing with empty labels + # since empty labels will generate index `0` through `F.one_hot` + # NOTE: but what if the topk_idxs include `0`? + is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) + return is_in_topk.to(metrics.dtype) + + def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): + """ + Args: + gt_labels: (b, max_num_obj, 1) + gt_bboxes: (b, max_num_obj, 4) + target_gt_idx: (b, h*w) + fg_mask: (b, h*w) + """ + + # assigned target labels, (b, 1) + batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] + target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) + target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) + + # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) + target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] + + # assigned target scores + target_labels.clamp(0) + target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) + fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) + target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) + + return target_labels, target_bboxes, target_scores diff --git a/utils/tal/__init__.py b/utils/tal/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..84952a8167bc2975913a6def6b4f027d566552a9 --- /dev/null +++ b/utils/tal/__init__.py @@ -0,0 +1 @@ +# init \ No newline at end of file diff --git a/utils/tal/anchor_generator.py b/utils/tal/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..0de163651e21225445097f90e05a6c6d8ff10092 --- /dev/null +++ b/utils/tal/anchor_generator.py @@ -0,0 +1,38 @@ +import torch + +from utils.general import check_version + +TORCH_1_10 = check_version(torch.__version__, '1.10.0') + + +def make_anchors(feats, strides, grid_cell_offset=0.5): + """Generate anchors from features.""" + anchor_points, stride_tensor = [], [] + assert feats is not None + dtype, device = feats[0].dtype, feats[0].device + for i, stride in enumerate(strides): + _, _, h, w = feats[i].shape + sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset # shift x + sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset # shift y + sy, sx = torch.meshgrid(sy, sx, indexing='ij') if TORCH_1_10 else torch.meshgrid(sy, sx) + anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2)) + stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device)) + return torch.cat(anchor_points), torch.cat(stride_tensor) + + +def dist2bbox(distance, anchor_points, xywh=True, dim=-1): + """Transform distance(ltrb) to box(xywh or xyxy).""" + lt, rb = torch.split(distance, 2, dim) + x1y1 = anchor_points - lt + x2y2 = anchor_points + rb + if xywh: + c_xy = (x1y1 + x2y2) / 2 + wh = x2y2 - x1y1 + return torch.cat((c_xy, wh), dim) # xywh bbox + return torch.cat((x1y1, x2y2), dim) # xyxy bbox + + +def bbox2dist(anchor_points, bbox, reg_max): + """Transform bbox(xyxy) to dist(ltrb).""" + x1y1, x2y2 = torch.split(bbox, 2, -1) + return torch.cat((anchor_points - x1y1, x2y2 - anchor_points), -1).clamp(0, reg_max - 0.01) # dist (lt, rb) diff --git a/utils/tal/assigner.py b/utils/tal/assigner.py new file mode 100644 index 0000000000000000000000000000000000000000..ac4bdb8c83837d3bee11ae7996e34f1e87ba3b65 --- /dev/null +++ b/utils/tal/assigner.py @@ -0,0 +1,179 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils.metrics import bbox_iou + + +def select_candidates_in_gts(xy_centers, gt_bboxes, eps=1e-9): + """select the positive anchor center in gt + + Args: + xy_centers (Tensor): shape(h*w, 4) + gt_bboxes (Tensor): shape(b, n_boxes, 4) + Return: + (Tensor): shape(b, n_boxes, h*w) + """ + n_anchors = xy_centers.shape[0] + bs, n_boxes, _ = gt_bboxes.shape + lt, rb = gt_bboxes.view(-1, 1, 4).chunk(2, 2) # left-top, right-bottom + bbox_deltas = torch.cat((xy_centers[None] - lt, rb - xy_centers[None]), dim=2).view(bs, n_boxes, n_anchors, -1) + # return (bbox_deltas.min(3)[0] > eps).to(gt_bboxes.dtype) + return bbox_deltas.amin(3).gt_(eps) + + +def select_highest_overlaps(mask_pos, overlaps, n_max_boxes): + """if an anchor box is assigned to multiple gts, + the one with the highest iou will be selected. + + Args: + mask_pos (Tensor): shape(b, n_max_boxes, h*w) + overlaps (Tensor): shape(b, n_max_boxes, h*w) + Return: + target_gt_idx (Tensor): shape(b, h*w) + fg_mask (Tensor): shape(b, h*w) + mask_pos (Tensor): shape(b, n_max_boxes, h*w) + """ + # (b, n_max_boxes, h*w) -> (b, h*w) + fg_mask = mask_pos.sum(-2) + if fg_mask.max() > 1: # one anchor is assigned to multiple gt_bboxes + mask_multi_gts = (fg_mask.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) # (b, n_max_boxes, h*w) + max_overlaps_idx = overlaps.argmax(1) # (b, h*w) + is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) # (b, h*w, n_max_boxes) + is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) # (b, n_max_boxes, h*w) + mask_pos = torch.where(mask_multi_gts, is_max_overlaps, mask_pos) # (b, n_max_boxes, h*w) + fg_mask = mask_pos.sum(-2) + # find each grid serve which gt(index) + target_gt_idx = mask_pos.argmax(-2) # (b, h*w) + return target_gt_idx, fg_mask, mask_pos + + +class TaskAlignedAssigner(nn.Module): + def __init__(self, topk=13, num_classes=80, alpha=1.0, beta=6.0, eps=1e-9): + super().__init__() + self.topk = topk + self.num_classes = num_classes + self.bg_idx = num_classes + self.alpha = alpha + self.beta = beta + self.eps = eps + + @torch.no_grad() + def forward(self, pd_scores, pd_bboxes, anc_points, gt_labels, gt_bboxes, mask_gt): + """This code referenced to + https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py + + Args: + pd_scores (Tensor): shape(bs, num_total_anchors, num_classes) + pd_bboxes (Tensor): shape(bs, num_total_anchors, 4) + anc_points (Tensor): shape(num_total_anchors, 2) + gt_labels (Tensor): shape(bs, n_max_boxes, 1) + gt_bboxes (Tensor): shape(bs, n_max_boxes, 4) + mask_gt (Tensor): shape(bs, n_max_boxes, 1) + Returns: + target_labels (Tensor): shape(bs, num_total_anchors) + target_bboxes (Tensor): shape(bs, num_total_anchors, 4) + target_scores (Tensor): shape(bs, num_total_anchors, num_classes) + fg_mask (Tensor): shape(bs, num_total_anchors) + """ + self.bs = pd_scores.size(0) + self.n_max_boxes = gt_bboxes.size(1) + + if self.n_max_boxes == 0: + device = gt_bboxes.device + return (torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), + torch.zeros_like(pd_bboxes).to(device), + torch.zeros_like(pd_scores).to(device), + torch.zeros_like(pd_scores[..., 0]).to(device)) + + mask_pos, align_metric, overlaps = self.get_pos_mask(pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, + mask_gt) + + target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(mask_pos, overlaps, self.n_max_boxes) + + # assigned target + target_labels, target_bboxes, target_scores = self.get_targets(gt_labels, gt_bboxes, target_gt_idx, fg_mask) + + # normalize + align_metric *= mask_pos + pos_align_metrics = align_metric.amax(axis=-1, keepdim=True) # b, max_num_obj + pos_overlaps = (overlaps * mask_pos).amax(axis=-1, keepdim=True) # b, max_num_obj + norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).amax(-2).unsqueeze(-1) + target_scores = target_scores * norm_align_metric + + return target_labels, target_bboxes, target_scores, fg_mask.bool() + + def get_pos_mask(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes, anc_points, mask_gt): + + # get anchor_align metric, (b, max_num_obj, h*w) + align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes) + # get in_gts mask, (b, max_num_obj, h*w) + mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes) + # get topk_metric mask, (b, max_num_obj, h*w) + mask_topk = self.select_topk_candidates(align_metric * mask_in_gts, + topk_mask=mask_gt.repeat([1, 1, self.topk]).bool()) + # merge all mask to a final mask, (b, max_num_obj, h*w) + mask_pos = mask_topk * mask_in_gts * mask_gt + + return mask_pos, align_metric, overlaps + + def get_box_metrics(self, pd_scores, pd_bboxes, gt_labels, gt_bboxes): + + gt_labels = gt_labels.to(torch.long) # b, max_num_obj, 1 + ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) # 2, b, max_num_obj + ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) # b, max_num_obj + ind[1] = gt_labels.squeeze(-1) # b, max_num_obj + # get the scores of each grid for each gt cls + bbox_scores = pd_scores[ind[0], :, ind[1]] # b, max_num_obj, h*w + + overlaps = bbox_iou(gt_bboxes.unsqueeze(2), pd_bboxes.unsqueeze(1), xywh=False, CIoU=True).squeeze(3).clamp(0) + align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) + return align_metric, overlaps + + def select_topk_candidates(self, metrics, largest=True, topk_mask=None): + """ + Args: + metrics: (b, max_num_obj, h*w). + topk_mask: (b, max_num_obj, topk) or None + """ + + num_anchors = metrics.shape[-1] # h*w + # (b, max_num_obj, topk) + topk_metrics, topk_idxs = torch.topk(metrics, self.topk, dim=-1, largest=largest) + if topk_mask is None: + topk_mask = (topk_metrics.max(-1, keepdim=True) > self.eps).tile([1, 1, self.topk]) + # (b, max_num_obj, topk) + topk_idxs = torch.where(topk_mask, topk_idxs, 0) + # (b, max_num_obj, topk, h*w) -> (b, max_num_obj, h*w) + is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(-2) + # filter invalid bboxes + # assigned topk should be unique, this is for dealing with empty labels + # since empty labels will generate index `0` through `F.one_hot` + # NOTE: but what if the topk_idxs include `0`? + is_in_topk = torch.where(is_in_topk > 1, 0, is_in_topk) + return is_in_topk.to(metrics.dtype) + + def get_targets(self, gt_labels, gt_bboxes, target_gt_idx, fg_mask): + """ + Args: + gt_labels: (b, max_num_obj, 1) + gt_bboxes: (b, max_num_obj, 4) + target_gt_idx: (b, h*w) + fg_mask: (b, h*w) + """ + + # assigned target labels, (b, 1) + batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[..., None] + target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes # (b, h*w) + target_labels = gt_labels.long().flatten()[target_gt_idx] # (b, h*w) + + # assigned target boxes, (b, max_num_obj, 4) -> (b, h*w) + target_bboxes = gt_bboxes.view(-1, 4)[target_gt_idx] + + # assigned target scores + target_labels.clamp(0) + target_scores = F.one_hot(target_labels, self.num_classes) # (b, h*w, 80) + fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes) # (b, h*w, 80) + target_scores = torch.where(fg_scores_mask > 0, target_scores, 0) + + return target_labels, target_bboxes, target_scores diff --git a/utils/torch_utils.py b/utils/torch_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..076ec6b15793b07bf33b4163254be4d953196215 --- /dev/null +++ b/utils/torch_utils.py @@ -0,0 +1,529 @@ +import math +import os +import platform +import subprocess +import time +import warnings +from contextlib import contextmanager +from copy import deepcopy +from pathlib import Path + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from torch.nn.parallel import DistributedDataParallel as DDP + +from utils.general import LOGGER, check_version, colorstr, file_date, git_describe +from utils.lion import Lion + +LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html +RANK = int(os.getenv('RANK', -1)) +WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1)) + +try: + import thop # for FLOPs computation +except ImportError: + thop = None + +# Suppress PyTorch warnings +warnings.filterwarnings('ignore', message='User provided device_type of \'cuda\', but CUDA is not available. Disabling') +warnings.filterwarnings('ignore', category=UserWarning) + + +def smart_inference_mode(torch_1_9=check_version(torch.__version__, '1.9.0')): + # Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator + def decorate(fn): + return (torch.inference_mode if torch_1_9 else torch.no_grad)()(fn) + + return decorate + + +def smartCrossEntropyLoss(label_smoothing=0.0): + # Returns nn.CrossEntropyLoss with label smoothing enabled for torch>=1.10.0 + if check_version(torch.__version__, '1.10.0'): + return nn.CrossEntropyLoss(label_smoothing=label_smoothing) + if label_smoothing > 0: + LOGGER.warning(f'WARNING ⚠️ label smoothing {label_smoothing} requires torch>=1.10.0') + return nn.CrossEntropyLoss() + + +def smart_DDP(model): + # Model DDP creation with checks + assert not check_version(torch.__version__, '1.12.0', pinned=True), \ + 'torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. ' \ + 'Please upgrade or downgrade torch to use DDP. See https://github.com/ultralytics/yolov5/issues/8395' + if check_version(torch.__version__, '1.11.0'): + return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True) + else: + return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK) + + +def reshape_classifier_output(model, n=1000): + # Update a TorchVision classification model to class count 'n' if required + from models.common import Classify + name, m = list((model.model if hasattr(model, 'model') else model).named_children())[-1] # last module + if isinstance(m, Classify): # YOLOv5 Classify() head + if m.linear.out_features != n: + m.linear = nn.Linear(m.linear.in_features, n) + elif isinstance(m, nn.Linear): # ResNet, EfficientNet + if m.out_features != n: + setattr(model, name, nn.Linear(m.in_features, n)) + elif isinstance(m, nn.Sequential): + types = [type(x) for x in m] + if nn.Linear in types: + i = types.index(nn.Linear) # nn.Linear index + if m[i].out_features != n: + m[i] = nn.Linear(m[i].in_features, n) + elif nn.Conv2d in types: + i = types.index(nn.Conv2d) # nn.Conv2d index + if m[i].out_channels != n: + m[i] = nn.Conv2d(m[i].in_channels, n, m[i].kernel_size, m[i].stride, bias=m[i].bias is not None) + + +@contextmanager +def torch_distributed_zero_first(local_rank: int): + # Decorator to make all processes in distributed training wait for each local_master to do something + if local_rank not in [-1, 0]: + dist.barrier(device_ids=[local_rank]) + yield + if local_rank == 0: + dist.barrier(device_ids=[0]) + + +def device_count(): + # Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Supports Linux and Windows + assert platform.system() in ('Linux', 'Windows'), 'device_count() only supported on Linux or Windows' + try: + cmd = 'nvidia-smi -L | wc -l' if platform.system() == 'Linux' else 'nvidia-smi -L | find /c /v ""' # Windows + return int(subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]) + except Exception: + return 0 + + +def select_device(device='', batch_size=0, newline=True): + # device = None or 'cpu' or 0 or '0' or '0,1,2,3' + s = f'YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} ' + device = str(device).strip().lower().replace('cuda:', '').replace('none', '') # to string, 'cuda:0' to '0' + cpu = device == 'cpu' + mps = device == 'mps' # Apple Metal Performance Shaders (MPS) + if cpu or mps: + os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False + elif device: # non-cpu device requested + os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available() + assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \ + f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)" + + if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available + devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7 + n = len(devices) # device count + if n > 1 and batch_size > 0: # check batch_size is divisible by device_count + assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' + space = ' ' * (len(s) + 1) + for i, d in enumerate(devices): + p = torch.cuda.get_device_properties(i) + s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB + arg = 'cuda:0' + elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available(): # prefer MPS if available + s += 'MPS\n' + arg = 'mps' + else: # revert to CPU + s += 'CPU\n' + arg = 'cpu' + + if not newline: + s = s.rstrip() + LOGGER.info(s) + return torch.device(arg) + + +def time_sync(): + # PyTorch-accurate time + if torch.cuda.is_available(): + torch.cuda.synchronize() + return time.time() + + +def profile(input, ops, n=10, device=None): + """ YOLOv5 speed/memory/FLOPs profiler + Usage: + input = torch.randn(16, 3, 640, 640) + m1 = lambda x: x * torch.sigmoid(x) + m2 = nn.SiLU() + profile(input, [m1, m2], n=100) # profile over 100 iterations + """ + results = [] + if not isinstance(device, torch.device): + device = select_device(device) + print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}" + f"{'input':>24s}{'output':>24s}") + + for x in input if isinstance(input, list) else [input]: + x = x.to(device) + x.requires_grad = True + for m in ops if isinstance(ops, list) else [ops]: + m = m.to(device) if hasattr(m, 'to') else m # device + m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m + tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward + try: + flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs + except Exception: + flops = 0 + + try: + for _ in range(n): + t[0] = time_sync() + y = m(x) + t[1] = time_sync() + try: + _ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward() + t[2] = time_sync() + except Exception: # no backward method + # print(e) # for debug + t[2] = float('nan') + tf += (t[1] - t[0]) * 1000 / n # ms per op forward + tb += (t[2] - t[1]) * 1000 / n # ms per op backward + mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB) + s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes + p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters + print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}') + results.append([p, flops, mem, tf, tb, s_in, s_out]) + except Exception as e: + print(e) + results.append(None) + torch.cuda.empty_cache() + return results + + +def is_parallel(model): + # Returns True if model is of type DP or DDP + return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) + + +def de_parallel(model): + # De-parallelize a model: returns single-GPU model if model is of type DP or DDP + return model.module if is_parallel(model) else model + + +def initialize_weights(model): + for m in model.modules(): + t = type(m) + if t is nn.Conv2d: + pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + m.inplace = True + + +def find_modules(model, mclass=nn.Conv2d): + # Finds layer indices matching module class 'mclass' + return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] + + +def sparsity(model): + # Return global model sparsity + a, b = 0, 0 + for p in model.parameters(): + a += p.numel() + b += (p == 0).sum() + return b / a + + +def prune(model, amount=0.3): + # Prune model to requested global sparsity + import torch.nn.utils.prune as prune + for name, m in model.named_modules(): + if isinstance(m, nn.Conv2d): + prune.l1_unstructured(m, name='weight', amount=amount) # prune + prune.remove(m, 'weight') # make permanent + LOGGER.info(f'Model pruned to {sparsity(model):.3g} global sparsity') + + +def fuse_conv_and_bn(conv, bn): + # Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + fusedconv = nn.Conv2d(conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + dilation=conv.dilation, + groups=conv.groups, + bias=True).requires_grad_(False).to(conv.weight.device) + + # Prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) + + # Prepare spatial bias + b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def model_info(model, verbose=False, imgsz=640): + # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] + n_p = sum(x.numel() for x in model.parameters()) # number parameters + n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients + if verbose: + print(f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}") + for i, (name, p) in enumerate(model.named_parameters()): + name = name.replace('module_list.', '') + print('%5g %40s %9s %12g %20s %10.3g %10.3g' % + (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) + + try: # FLOPs + p = next(model.parameters()) + stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 # max stride + im = torch.empty((1, p.shape[1], stride, stride), device=p.device) # input image in BCHW format + flops = thop.profile(deepcopy(model), inputs=(im,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs + imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz] # expand if int/float + fs = f', {flops * imgsz[0] / stride * imgsz[1] / stride:.1f} GFLOPs' # 640x640 GFLOPs + except Exception: + fs = '' + + name = Path(model.yaml_file).stem.replace('yolov5', 'YOLOv5') if hasattr(model, 'yaml_file') else 'Model' + LOGGER.info(f"{name} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") + + +def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) + # Scales img(bs,3,y,x) by ratio constrained to gs-multiple + if ratio == 1.0: + return img + h, w = img.shape[2:] + s = (int(h * ratio), int(w * ratio)) # new size + img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize + if not same_shape: # pad/crop img + h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w)) + return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean + + +def copy_attr(a, b, include=(), exclude=()): + # Copy attributes from b to a, options to only include [...] and to exclude [...] + for k, v in b.__dict__.items(): + if (len(include) and k not in include) or k.startswith('_') or k in exclude: + continue + else: + setattr(a, k, v) + + +def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e-5): + # YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay + g = [], [], [] # optimizer parameter groups + bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d() + #for v in model.modules(): + # for p_name, p in v.named_parameters(recurse=0): + # if p_name == 'bias': # bias (no decay) + # g[2].append(p) + # elif p_name == 'weight' and isinstance(v, bn): # weight (no decay) + # g[1].append(p) + # else: + # g[0].append(p) # weight (with decay) + + for v in model.modules(): + if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias (no decay) + g[2].append(v.bias) + if isinstance(v, bn): # weight (no decay) + g[1].append(v.weight) + elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay) + g[0].append(v.weight) + + if hasattr(v, 'im'): + if hasattr(v.im, 'implicit'): + g[1].append(v.im.implicit) + else: + for iv in v.im: + g[1].append(iv.implicit) + if hasattr(v, 'ia'): + if hasattr(v.ia, 'implicit'): + g[1].append(v.ia.implicit) + else: + for iv in v.ia: + g[1].append(iv.implicit) + + if hasattr(v, 'im2'): + if hasattr(v.im2, 'implicit'): + g[1].append(v.im2.implicit) + else: + for iv in v.im2: + g[1].append(iv.implicit) + if hasattr(v, 'ia2'): + if hasattr(v.ia2, 'implicit'): + g[1].append(v.ia2.implicit) + else: + for iv in v.ia2: + g[1].append(iv.implicit) + + if hasattr(v, 'im3'): + if hasattr(v.im3, 'implicit'): + g[1].append(v.im3.implicit) + else: + for iv in v.im3: + g[1].append(iv.implicit) + if hasattr(v, 'ia3'): + if hasattr(v.ia3, 'implicit'): + g[1].append(v.ia3.implicit) + else: + for iv in v.ia3: + g[1].append(iv.implicit) + + if hasattr(v, 'im4'): + if hasattr(v.im4, 'implicit'): + g[1].append(v.im4.implicit) + else: + for iv in v.im4: + g[1].append(iv.implicit) + if hasattr(v, 'ia4'): + if hasattr(v.ia4, 'implicit'): + g[1].append(v.ia4.implicit) + else: + for iv in v.ia4: + g[1].append(iv.implicit) + + if hasattr(v, 'im5'): + if hasattr(v.im5, 'implicit'): + g[1].append(v.im5.implicit) + else: + for iv in v.im5: + g[1].append(iv.implicit) + if hasattr(v, 'ia5'): + if hasattr(v.ia5, 'implicit'): + g[1].append(v.ia5.implicit) + else: + for iv in v.ia5: + g[1].append(iv.implicit) + + if hasattr(v, 'im6'): + if hasattr(v.im6, 'implicit'): + g[1].append(v.im6.implicit) + else: + for iv in v.im6: + g[1].append(iv.implicit) + if hasattr(v, 'ia6'): + if hasattr(v.ia6, 'implicit'): + g[1].append(v.ia6.implicit) + else: + for iv in v.ia6: + g[1].append(iv.implicit) + + if hasattr(v, 'im7'): + if hasattr(v.im7, 'implicit'): + g[1].append(v.im7.implicit) + else: + for iv in v.im7: + g[1].append(iv.implicit) + if hasattr(v, 'ia7'): + if hasattr(v.ia7, 'implicit'): + g[1].append(v.ia7.implicit) + else: + for iv in v.ia7: + g[1].append(iv.implicit) + + if name == 'Adam': + optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999)) # adjust beta1 to momentum + elif name == 'AdamW': + optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0, amsgrad=True) + elif name == 'RMSProp': + optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum) + elif name == 'SGD': + optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True) + elif name == 'LION': + optimizer = Lion(g[2], lr=lr, betas=(momentum, 0.99), weight_decay=0.0) + else: + raise NotImplementedError(f'Optimizer {name} not implemented.') + + optimizer.add_param_group({'params': g[0], 'weight_decay': decay}) # add g0 with weight_decay + optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights) + LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}) with parameter groups " + f"{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias") + return optimizer + + +def smart_hub_load(repo='ultralytics/yolov5', model='yolov5s', **kwargs): + # YOLOv5 torch.hub.load() wrapper with smart error/issue handling + if check_version(torch.__version__, '1.9.1'): + kwargs['skip_validation'] = True # validation causes GitHub API rate limit errors + if check_version(torch.__version__, '1.12.0'): + kwargs['trust_repo'] = True # argument required starting in torch 0.12 + try: + return torch.hub.load(repo, model, **kwargs) + except Exception: + return torch.hub.load(repo, model, force_reload=True, **kwargs) + + +def smart_resume(ckpt, optimizer, ema=None, weights='yolov5s.pt', epochs=300, resume=True): + # Resume training from a partially trained checkpoint + best_fitness = 0.0 + start_epoch = ckpt['epoch'] + 1 + if ckpt['optimizer'] is not None: + optimizer.load_state_dict(ckpt['optimizer']) # optimizer + best_fitness = ckpt['best_fitness'] + if ema and ckpt.get('ema'): + ema.ema.load_state_dict(ckpt['ema'].float().state_dict()) # EMA + ema.updates = ckpt['updates'] + if resume: + assert start_epoch > 0, f'{weights} training to {epochs} epochs is finished, nothing to resume.\n' \ + f"Start a new training without --resume, i.e. 'python train.py --weights {weights}'" + LOGGER.info(f'Resuming training from {weights} from epoch {start_epoch} to {epochs} total epochs') + if epochs < start_epoch: + LOGGER.info(f"{weights} has been trained for {ckpt['epoch']} epochs. Fine-tuning for {epochs} more epochs.") + epochs += ckpt['epoch'] # finetune additional epochs + return best_fitness, start_epoch, epochs + + +class EarlyStopping: + # YOLOv5 simple early stopper + def __init__(self, patience=30): + self.best_fitness = 0.0 # i.e. mAP + self.best_epoch = 0 + self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop + self.possible_stop = False # possible stop may occur next epoch + + def __call__(self, epoch, fitness): + if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training + self.best_epoch = epoch + self.best_fitness = fitness + delta = epoch - self.best_epoch # epochs without improvement + self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch + stop = delta >= self.patience # stop training if patience exceeded + if stop: + LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. ' + f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n' + f'To update EarlyStopping(patience={self.patience}) pass a new patience value, ' + f'i.e. `python train.py --patience 300` or use `--patience 0` to disable EarlyStopping.') + return stop + + +class ModelEMA: + """ Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models + Keeps a moving average of everything in the model state_dict (parameters and buffers) + For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + """ + + def __init__(self, model, decay=0.9999, tau=2000, updates=0): + # Create EMA + self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA + self.updates = updates # number of EMA updates + self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs) + for p in self.ema.parameters(): + p.requires_grad_(False) + + def update(self, model): + # Update EMA parameters + self.updates += 1 + d = self.decay(self.updates) + + msd = de_parallel(model).state_dict() # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: # true for FP16 and FP32 + v *= d + v += (1 - d) * msd[k].detach() + # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32' + + def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): + # Update EMA attributes + copy_attr(self.ema, model, include, exclude) diff --git a/utils/triton.py b/utils/triton.py new file mode 100644 index 0000000000000000000000000000000000000000..bf09797cf02335e7bd6da7ac8c451979a6073f5d --- /dev/null +++ b/utils/triton.py @@ -0,0 +1,81 @@ +import typing +from urllib.parse import urlparse + +import torch + + +class TritonRemoteModel: + """ A wrapper over a model served by the Triton Inference Server. It can + be configured to communicate over GRPC or HTTP. It accepts Torch Tensors + as input and returns them as outputs. + """ + + def __init__(self, url: str): + """ + Keyword arguments: + url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000 + """ + + parsed_url = urlparse(url) + if parsed_url.scheme == "grpc": + from tritonclient.grpc import InferenceServerClient, InferInput + + self.client = InferenceServerClient(parsed_url.netloc) # Triton GRPC client + model_repository = self.client.get_model_repository_index() + self.model_name = model_repository.models[0].name + self.metadata = self.client.get_model_metadata(self.model_name, as_json=True) + + def create_input_placeholders() -> typing.List[InferInput]: + return [ + InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] + + else: + from tritonclient.http import InferenceServerClient, InferInput + + self.client = InferenceServerClient(parsed_url.netloc) # Triton HTTP client + model_repository = self.client.get_model_repository_index() + self.model_name = model_repository[0]['name'] + self.metadata = self.client.get_model_metadata(self.model_name) + + def create_input_placeholders() -> typing.List[InferInput]: + return [ + InferInput(i['name'], [int(s) for s in i["shape"]], i['datatype']) for i in self.metadata['inputs']] + + self._create_input_placeholders_fn = create_input_placeholders + + @property + def runtime(self): + """Returns the model runtime""" + return self.metadata.get("backend", self.metadata.get("platform")) + + def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typing.Tuple[torch.Tensor, ...]]: + """ Invokes the model. Parameters can be provided via args or kwargs. + args, if provided, are assumed to match the order of inputs of the model. + kwargs are matched with the model input names. + """ + inputs = self._create_inputs(*args, **kwargs) + response = self.client.infer(model_name=self.model_name, inputs=inputs) + result = [] + for output in self.metadata['outputs']: + tensor = torch.as_tensor(response.as_numpy(output['name'])) + result.append(tensor) + return result[0] if len(result) == 1 else result + + def _create_inputs(self, *args, **kwargs): + args_len, kwargs_len = len(args), len(kwargs) + if not args_len and not kwargs_len: + raise RuntimeError("No inputs provided.") + if args_len and kwargs_len: + raise RuntimeError("Cannot specify args and kwargs at the same time") + + placeholders = self._create_input_placeholders_fn() + if args_len: + if args_len != len(placeholders): + raise RuntimeError(f"Expected {len(placeholders)} inputs, got {args_len}.") + for input, value in zip(placeholders, args): + input.set_data_from_numpy(value.cpu().numpy()) + else: + for input in placeholders: + value = kwargs[input.name] + input.set_data_from_numpy(value.cpu().numpy()) + return placeholders diff --git a/weights/best.pt b/weights/best.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a43e27b6f62fac24d825aa4f3abb8c93afb2fd8 --- /dev/null +++ b/weights/best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803a3dc6fbeae49ddb52ae33612d5c09e4c96bd3bfaf9cbfebad606cc667a690 +size 204631526