|
import math |
|
|
|
import numpy as np |
|
import torch |
|
import torch.nn as nn |
|
|
|
from utils.downloads import attempt_download |
|
|
|
|
|
class Sum(nn.Module): |
|
|
|
def __init__(self, n, weight=False): |
|
super().__init__() |
|
self.weight = weight |
|
self.iter = range(n - 1) |
|
if weight: |
|
self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) |
|
|
|
def forward(self, x): |
|
y = x[0] |
|
if self.weight: |
|
w = torch.sigmoid(self.w) * 2 |
|
for i in self.iter: |
|
y = y + x[i + 1] * w[i] |
|
else: |
|
for i in self.iter: |
|
y = y + x[i + 1] |
|
return y |
|
|
|
|
|
class MixConv2d(nn.Module): |
|
|
|
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): |
|
super().__init__() |
|
n = len(k) |
|
if equal_ch: |
|
i = torch.linspace(0, n - 1E-6, c2).floor() |
|
c_ = [(i == g).sum() for g in range(n)] |
|
else: |
|
b = [c2] + [0] * n |
|
a = np.eye(n + 1, n, k=-1) |
|
a -= np.roll(a, 1, axis=1) |
|
a *= np.array(k) ** 2 |
|
a[0] = 1 |
|
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() |
|
|
|
self.m = nn.ModuleList([ |
|
nn.Conv2d(c1, int(c_), k, s, k // 2, groups=math.gcd(c1, int(c_)), bias=False) for k, c_ in zip(k, c_)]) |
|
self.bn = nn.BatchNorm2d(c2) |
|
self.act = nn.SiLU() |
|
|
|
def forward(self, x): |
|
return self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) |
|
|
|
|
|
class Ensemble(nn.ModuleList): |
|
|
|
def __init__(self): |
|
super().__init__() |
|
|
|
def forward(self, x, augment=False, profile=False, visualize=False): |
|
y = [module(x, augment, profile, visualize)[0] for module in self] |
|
|
|
|
|
y = torch.cat(y, 1) |
|
return y, None |
|
|
|
|
|
class ORT_NMS(torch.autograd.Function): |
|
'''ONNX-Runtime NMS operation''' |
|
@staticmethod |
|
def forward(ctx, |
|
boxes, |
|
scores, |
|
max_output_boxes_per_class=torch.tensor([100]), |
|
iou_threshold=torch.tensor([0.45]), |
|
score_threshold=torch.tensor([0.25])): |
|
device = boxes.device |
|
batch = scores.shape[0] |
|
num_det = random.randint(0, 100) |
|
batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device) |
|
idxs = torch.arange(100, 100 + num_det).to(device) |
|
zeros = torch.zeros((num_det,), dtype=torch.int64).to(device) |
|
selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous() |
|
selected_indices = selected_indices.to(torch.int64) |
|
return selected_indices |
|
|
|
@staticmethod |
|
def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold): |
|
return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold) |
|
|
|
|
|
class TRT_NMS(torch.autograd.Function): |
|
'''TensorRT NMS operation''' |
|
@staticmethod |
|
def forward( |
|
ctx, |
|
boxes, |
|
scores, |
|
background_class=-1, |
|
box_coding=1, |
|
iou_threshold=0.45, |
|
max_output_boxes=100, |
|
plugin_version="1", |
|
score_activation=0, |
|
score_threshold=0.25, |
|
): |
|
|
|
batch_size, num_boxes, num_classes = scores.shape |
|
num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32) |
|
det_boxes = torch.randn(batch_size, max_output_boxes, 4) |
|
det_scores = torch.randn(batch_size, max_output_boxes) |
|
det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32) |
|
return num_det, det_boxes, det_scores, det_classes |
|
|
|
@staticmethod |
|
def symbolic(g, |
|
boxes, |
|
scores, |
|
background_class=-1, |
|
box_coding=1, |
|
iou_threshold=0.45, |
|
max_output_boxes=100, |
|
plugin_version="1", |
|
score_activation=0, |
|
score_threshold=0.25): |
|
out = g.op("TRT::EfficientNMS_TRT", |
|
boxes, |
|
scores, |
|
background_class_i=background_class, |
|
box_coding_i=box_coding, |
|
iou_threshold_f=iou_threshold, |
|
max_output_boxes_i=max_output_boxes, |
|
plugin_version_s=plugin_version, |
|
score_activation_i=score_activation, |
|
score_threshold_f=score_threshold, |
|
outputs=4) |
|
nums, boxes, scores, classes = out |
|
return nums, boxes, scores, classes |
|
|
|
|
|
class ONNX_ORT(nn.Module): |
|
'''onnx module with ONNX-Runtime NMS operation.''' |
|
def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None, n_classes=80): |
|
super().__init__() |
|
self.device = device if device else torch.device("cpu") |
|
self.max_obj = torch.tensor([max_obj]).to(device) |
|
self.iou_threshold = torch.tensor([iou_thres]).to(device) |
|
self.score_threshold = torch.tensor([score_thres]).to(device) |
|
self.max_wh = max_wh |
|
self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], |
|
dtype=torch.float32, |
|
device=self.device) |
|
self.n_classes=n_classes |
|
|
|
def forward(self, x): |
|
|
|
|
|
if isinstance(x, list): |
|
x = x[1] |
|
x = x.permute(0, 2, 1) |
|
bboxes_x = x[..., 0:1] |
|
bboxes_y = x[..., 1:2] |
|
bboxes_w = x[..., 2:3] |
|
bboxes_h = x[..., 3:4] |
|
bboxes = torch.cat([bboxes_x, bboxes_y, bboxes_w, bboxes_h], dim = -1) |
|
bboxes = bboxes.unsqueeze(2) |
|
obj_conf = x[..., 4:] |
|
scores = obj_conf |
|
bboxes @= self.convert_matrix |
|
max_score, category_id = scores.max(2, keepdim=True) |
|
dis = category_id.float() * self.max_wh |
|
nmsbox = bboxes + dis |
|
max_score_tp = max_score.transpose(1, 2).contiguous() |
|
selected_indices = ORT_NMS.apply(nmsbox, max_score_tp, self.max_obj, self.iou_threshold, self.score_threshold) |
|
X, Y = selected_indices[:, 0], selected_indices[:, 2] |
|
selected_boxes = bboxes[X, Y, :] |
|
selected_categories = category_id[X, Y, :].float() |
|
selected_scores = max_score[X, Y, :] |
|
X = X.unsqueeze(1).float() |
|
return torch.cat([X, selected_boxes, selected_categories, selected_scores], 1) |
|
|
|
|
|
class ONNX_TRT(nn.Module): |
|
'''onnx module with TensorRT NMS operation.''' |
|
def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None, n_classes=80): |
|
super().__init__() |
|
assert max_wh is None |
|
self.device = device if device else torch.device('cpu') |
|
self.background_class = -1, |
|
self.box_coding = 1, |
|
self.iou_threshold = iou_thres |
|
self.max_obj = max_obj |
|
self.plugin_version = '1' |
|
self.score_activation = 0 |
|
self.score_threshold = score_thres |
|
self.n_classes=n_classes |
|
|
|
def forward(self, x): |
|
|
|
|
|
if isinstance(x, list): |
|
x = x[1] |
|
x = x.permute(0, 2, 1) |
|
bboxes_x = x[..., 0:1] |
|
bboxes_y = x[..., 1:2] |
|
bboxes_w = x[..., 2:3] |
|
bboxes_h = x[..., 3:4] |
|
bboxes = torch.cat([bboxes_x, bboxes_y, bboxes_w, bboxes_h], dim = -1) |
|
bboxes = bboxes.unsqueeze(2) |
|
obj_conf = x[..., 4:] |
|
scores = obj_conf |
|
num_det, det_boxes, det_scores, det_classes = TRT_NMS.apply(bboxes, scores, self.background_class, self.box_coding, |
|
self.iou_threshold, self.max_obj, |
|
self.plugin_version, self.score_activation, |
|
self.score_threshold) |
|
return num_det, det_boxes, det_scores, det_classes |
|
|
|
class End2End(nn.Module): |
|
'''export onnx or tensorrt model with NMS operation.''' |
|
def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None, n_classes=80): |
|
super().__init__() |
|
device = device if device else torch.device('cpu') |
|
assert isinstance(max_wh,(int)) or max_wh is None |
|
self.model = model.to(device) |
|
self.model.model[-1].end2end = True |
|
self.patch_model = ONNX_TRT if max_wh is None else ONNX_ORT |
|
self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device, n_classes) |
|
self.end2end.eval() |
|
|
|
def forward(self, x): |
|
x = self.model(x) |
|
x = self.end2end(x) |
|
return x |
|
|
|
|
|
def attempt_load(weights, device=None, inplace=True, fuse=True): |
|
|
|
from models.yolo import Detect, Model |
|
|
|
model = Ensemble() |
|
for w in weights if isinstance(weights, list) else [weights]: |
|
ckpt = torch.load(attempt_download(w), map_location='cpu') |
|
ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() |
|
|
|
|
|
if not hasattr(ckpt, 'stride'): |
|
ckpt.stride = torch.tensor([32.]) |
|
if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)): |
|
ckpt.names = dict(enumerate(ckpt.names)) |
|
|
|
model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval()) |
|
|
|
|
|
for m in model.modules(): |
|
t = type(m) |
|
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model): |
|
m.inplace = inplace |
|
|
|
|
|
|
|
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'): |
|
m.recompute_scale_factor = None |
|
|
|
|
|
if len(model) == 1: |
|
return model[-1] |
|
|
|
|
|
print(f'Ensemble created with {weights}\n') |
|
for k in 'names', 'nc', 'yaml': |
|
setattr(model, k, getattr(model[0], k)) |
|
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride |
|
assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}' |
|
return model |
|
|