PKaushik commited on
Commit
0c7c723
1 Parent(s): 6d070d6
Files changed (1) hide show
  1. yolov6/models/end2end.py +152 -0
yolov6/models/end2end.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import random
4
+
5
+
6
+ class ORT_NMS(torch.autograd.Function):
7
+ '''ONNX-Runtime NMS operation'''
8
+ @staticmethod
9
+ def forward(ctx,
10
+ boxes,
11
+ scores,
12
+ max_output_boxes_per_class=torch.tensor([100]),
13
+ iou_threshold=torch.tensor([0.45]),
14
+ score_threshold=torch.tensor([0.25])):
15
+ device = boxes.device
16
+ batch = scores.shape[0]
17
+ num_det = random.randint(0, 100)
18
+ batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device)
19
+ idxs = torch.arange(100, 100 + num_det).to(device)
20
+ zeros = torch.zeros((num_det,), dtype=torch.int64).to(device)
21
+ selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous()
22
+ selected_indices = selected_indices.to(torch.int64)
23
+ return selected_indices
24
+
25
+ @staticmethod
26
+ def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold):
27
+ return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold)
28
+
29
+
30
+ class TRT_NMS(torch.autograd.Function):
31
+ '''TensorRT NMS operation'''
32
+ @staticmethod
33
+ def forward(
34
+ ctx,
35
+ boxes,
36
+ scores,
37
+ background_class=-1,
38
+ box_coding=1,
39
+ iou_threshold=0.45,
40
+ max_output_boxes=100,
41
+ plugin_version="1",
42
+ score_activation=0,
43
+ score_threshold=0.25,
44
+ ):
45
+ batch_size, num_boxes, num_classes = scores.shape
46
+ num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32)
47
+ det_boxes = torch.randn(batch_size, max_output_boxes, 4)
48
+ det_scores = torch.randn(batch_size, max_output_boxes)
49
+ det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32)
50
+ return num_det, det_boxes, det_scores, det_classes
51
+
52
+ @staticmethod
53
+ def symbolic(g,
54
+ boxes,
55
+ scores,
56
+ background_class=-1,
57
+ box_coding=1,
58
+ iou_threshold=0.45,
59
+ max_output_boxes=100,
60
+ plugin_version="1",
61
+ score_activation=0,
62
+ score_threshold=0.25):
63
+ out = g.op("TRT::EfficientNMS_TRT",
64
+ boxes,
65
+ scores,
66
+ background_class_i=background_class,
67
+ box_coding_i=box_coding,
68
+ iou_threshold_f=iou_threshold,
69
+ max_output_boxes_i=max_output_boxes,
70
+ plugin_version_s=plugin_version,
71
+ score_activation_i=score_activation,
72
+ score_threshold_f=score_threshold,
73
+ outputs=4)
74
+ nums, boxes, scores, classes = out
75
+ return nums, boxes, scores, classes
76
+
77
+
78
+ class ONNX_ORT(nn.Module):
79
+ '''onnx module with ONNX-Runtime NMS operation.'''
80
+ def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None):
81
+ super().__init__()
82
+ self.device = device if device else torch.device("cpu")
83
+ self.max_obj = torch.tensor([max_obj]).to(device)
84
+ self.iou_threshold = torch.tensor([iou_thres]).to(device)
85
+ self.score_threshold = torch.tensor([score_thres]).to(device)
86
+ self.max_wh = max_wh
87
+ self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
88
+ dtype=torch.float32,
89
+ device=self.device)
90
+
91
+ def forward(self, x):
92
+ box = x[:, :, :4]
93
+ conf = x[:, :, 4:5]
94
+ score = x[:, :, 5:]
95
+ score *= conf
96
+ box @= self.convert_matrix
97
+ objScore, objCls = score.max(2, keepdim=True)
98
+ dis = objCls.float() * self.max_wh
99
+ nmsbox = box + dis
100
+ objScore1 = objScore.transpose(1, 2).contiguous()
101
+ selected_indices = ORT_NMS.apply(nmsbox, objScore1, self.max_obj, self.iou_threshold, self.score_threshold)
102
+ X, Y = selected_indices[:, 0], selected_indices[:, 2]
103
+ resBoxes = box[X, Y, :]
104
+ resClasses = objCls[X, Y, :].float()
105
+ resScores = objScore[X, Y, :]
106
+ X = X.unsqueeze(1).float()
107
+ return torch.cat([X, resBoxes, resClasses, resScores], 1)
108
+
109
+ class ONNX_TRT(nn.Module):
110
+ '''onnx module with TensorRT NMS operation.'''
111
+ def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None):
112
+ super().__init__()
113
+ assert max_wh is None
114
+ self.device = device if device else torch.device('cpu')
115
+ self.background_class = -1,
116
+ self.box_coding = 1,
117
+ self.iou_threshold = iou_thres
118
+ self.max_obj = max_obj
119
+ self.plugin_version = '1'
120
+ self.score_activation = 0
121
+ self.score_threshold = score_thres
122
+
123
+ def forward(self, x):
124
+ box = x[:, :, :4]
125
+ conf = x[:, :, 4:5]
126
+ score = x[:, :, 5:]
127
+ score *= conf
128
+ num_det, det_boxes, det_scores, det_classes = TRT_NMS.apply(box, score, self.background_class, self.box_coding,
129
+ self.iou_threshold, self.max_obj,
130
+ self.plugin_version, self.score_activation,
131
+ self.score_threshold)
132
+ return num_det, det_boxes, det_scores, det_classes
133
+
134
+
135
+ class End2End(nn.Module):
136
+ '''export onnx or tensorrt model with NMS operation.'''
137
+ def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None, with_preprocess=False):
138
+ super().__init__()
139
+ device = device if device else torch.device('cpu')
140
+ self.with_preprocess = with_preprocess
141
+ self.model = model.to(device)
142
+ self.patch_model = ONNX_TRT if max_wh is None else ONNX_ORT
143
+ self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device)
144
+ self.end2end.eval()
145
+
146
+ def forward(self, x):
147
+ if self.with_preprocess:
148
+ x = x[:,[2,1,0],...]
149
+ x = x * (1/255)
150
+ x = self.model(x)
151
+ x = self.end2end(x)
152
+ return x