AlshimaaGamalAlsaied commited on
Commit
8a41b66
1 Parent(s): 1f063cf
app.py CHANGED
@@ -1,78 +1,6 @@
1
- # import gradio as gr
2
- # #import torch
3
- # import yolov7
4
-
5
-
6
- # #
7
- # # from huggingface_hub import hf_hub_download
8
- # from huggingface_hub import HfApi
9
-
10
-
11
- # # Images
12
- # #torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg', 'zidane.jpg')
13
- # #torch.hub.download_url_to_file('https://raw.githubusercontent.com/obss/sahi/main/tests/data/small-vehicles1.jpeg', 'small-vehicles1.jpeg')
14
-
15
- # def yolov7_inference(
16
- # image: gr.inputs.Image = None,
17
- # model_path: gr.inputs.Dropdown = None,
18
- # image_size: gr.inputs.Slider = 640,
19
- # conf_threshold: gr.inputs.Slider = 0.25,
20
- # iou_threshold: gr.inputs.Slider = 0.45,
21
- # ):
22
- # """
23
- # YOLOv7 inference function
24
- # Args:
25
- # image: Input image
26
- # model_path: Path to the model
27
- # image_size: Image size
28
- # conf_threshold: Confidence threshold
29
- # iou_threshold: IOU threshold
30
- # Returns:
31
- # Rendered image
32
- # """
33
-
34
- # model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
35
- # model.conf = conf_threshold
36
- # model.iou = iou_threshold
37
- # results = model([image], size=image_size)
38
- # return results.render()[0]
39
-
40
-
41
-
42
- # inputs = [
43
- # gr.inputs.Image(type="pil", label="Input Image"),
44
- # gr.inputs.Dropdown(
45
- # choices=[
46
- # "alshimaa/model_baseline",
47
- # "alshimaa/model_yolo7",
48
- # #"kadirnar/yolov7-v0.1",
49
- # ],
50
- # default="alshimaa/model_baseline",
51
- # label="Model",
52
- # )
53
- # #gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size")
54
- # #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"),
55
- # #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold")
56
- # ]
57
-
58
- # outputs = gr.outputs.Image(type="filepath", label="Output Image")
59
- # title = "Smart Environmental Eye (SEE)"
60
-
61
- # examples = [['image1.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image2.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image3.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45]]
62
- # demo_app = gr.Interface(
63
- # fn=yolov7_inference,
64
- # inputs=inputs,
65
- # outputs=outputs,
66
- # title=title,
67
- # examples=examples,
68
- # cache_examples=True,
69
- # theme='huggingface',
70
- # )
71
-
72
- # demo_app.launch(debug=True, enable_queue=True)
73
-
74
-
75
-
76
  import subprocess
77
  import tempfile
78
  import time
@@ -81,16 +9,41 @@ from pathlib import Path
81
  import cv2
82
  import gradio as gr
83
 
84
- from inferer import Inferer
85
-
86
- pipeline = Inferer("alshimaa/model_yolo7", device='cuda')
87
-
88
-
89
- def fn_image(image, conf_thres, iou_thres):
90
- return pipeline(image, conf_thres, iou_thres)
91
 
92
 
93
- def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
95
  end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
96
 
@@ -121,7 +74,7 @@ def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
121
  print(e)
122
  continue
123
  print("FRAME DTYPE", type(frame))
124
- out.write(pipeline(frame, conf_thres, iou_thres))
125
  num_frames += 1
126
  print("Processed {} frames".format(num_frames))
127
  if num_frames == max_frames:
@@ -135,29 +88,44 @@ def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
135
 
136
  return out_file.name
137
 
138
-
139
  image_interface = gr.Interface(
140
- fn=fn_image,
141
  inputs=[
142
- "image",
143
- gr.Slider(0, 1, value=0.5, label="Confidence Threshold"),
144
- gr.Slider(0, 1, value=0.5, label="IOU Threshold"),
145
- ],
146
- outputs=gr.Image(type="file"),
147
- examples=[["image1.jpg", 0.5, 0.5], ["image2.jpg", 0.25, 0.45], ["image3.jpg", 0.25, 0.45]],
 
 
 
 
 
 
 
 
 
148
  title="Smart Environmental Eye (SEE)",
149
- allow_flagging=False,
150
- allow_screenshot=False,
 
151
  )
152
 
 
153
  video_interface = gr.Interface(
154
- fn=fn_video,
155
  inputs=[
156
  gr.Video(type="file"),
157
- gr.Slider(0, 1, value=0.25, label="Confidence Threshold"),
158
- gr.Slider(0, 1, value=0.45, label="IOU Threshold"),
159
- gr.Slider(0, 10, value=0, label="Start Second", step=1),
160
- gr.Slider(0, 10 if pipeline.device.type != 'cpu' else 3, value=4, label="Duration", step=1),
 
 
 
 
 
161
  ],
162
  outputs=gr.Video(type="file", format="mp4"),
163
  # examples=[
@@ -169,11 +137,113 @@ video_interface = gr.Interface(
169
  allow_screenshot=False,
170
  )
171
 
172
-
173
-
174
  if __name__ == "__main__":
175
  gr.TabbedInterface(
176
  [image_interface, video_interface],
177
  ["Run on Images", "Run on Videos"],
178
  ).launch()
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ #import torch
3
+ import yolov7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import subprocess
5
  import tempfile
6
  import time
 
9
  import cv2
10
  import gradio as gr
11
 
 
 
 
 
 
 
 
12
 
13
 
14
+ # Images
15
+ #torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg', 'zidane.jpg')
16
+ #torch.hub.download_url_to_file('https://raw.githubusercontent.com/obss/sahi/main/tests/data/small-vehicles1.jpeg', 'small-vehicles1.jpeg')
17
+
18
+ def image_fn(
19
+ image: gr.inputs.Image = None,
20
+ model_path: gr.inputs.Dropdown = None,
21
+ image_size: gr.inputs.Slider = 640,
22
+ conf_threshold: gr.inputs.Slider = 0.25,
23
+ iou_threshold: gr.inputs.Slider = 0.45,
24
+ ):
25
+ """
26
+ YOLOv7 inference function
27
+ Args:
28
+ image: Input image
29
+ model_path: Path to the model
30
+ image_size: Image size
31
+ conf_threshold: Confidence threshold
32
+ iou_threshold: IOU threshold
33
+ Returns:
34
+ Rendered image
35
+ """
36
+
37
+ model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
38
+ model.conf = conf_threshold
39
+ model.iou = iou_threshold
40
+ results = model([image], size=image_size)
41
+ return results.render()[0]
42
+
43
+
44
+
45
+ def video_fn(model_path, video_file, conf_thres, iou_thres, start_sec, duration):
46
+ model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False)
47
  start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
48
  end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
49
 
 
74
  print(e)
75
  continue
76
  print("FRAME DTYPE", type(frame))
77
+ out.write(model(frame, conf_thres, iou_thres))
78
  num_frames += 1
79
  print("Processed {} frames".format(num_frames))
80
  if num_frames == max_frames:
 
88
 
89
  return out_file.name
90
 
 
91
  image_interface = gr.Interface(
92
+ fn=image_fn,
93
  inputs=[
94
+ gr.inputs.Image(type="pil", label="Input Image"),
95
+ gr.inputs.Dropdown(
96
+ choices=[
97
+ "alshimaa/model_baseline",
98
+ "alshimaa/model_yolo7",
99
+ #"kadirnar/yolov7-v0.1",
100
+ ],
101
+ default="alshimaa/model_baseline",
102
+ label="Model",
103
+ )
104
+ #gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size")
105
+ #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"),
106
+ #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold")
107
+ ],
108
+ outputs=gr.outputs.Image(type="filepath", label="Output Image"),
109
  title="Smart Environmental Eye (SEE)",
110
+ examples=[['image1.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image2.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image3.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45]],
111
+ cache_examples=True,
112
+ theme='huggingface',
113
  )
114
 
115
+
116
  video_interface = gr.Interface(
117
+ fn=video_fn,
118
  inputs=[
119
  gr.Video(type="file"),
120
+ gr.inputs.Dropdown(
121
+ choices=[
122
+ "alshimaa/model_baseline",
123
+ "alshimaa/model_yolo7",
124
+ #"kadirnar/yolov7-v0.1",
125
+ ],
126
+ default="alshimaa/model_baseline",
127
+ label="Model",
128
+ ),
129
  ],
130
  outputs=gr.Video(type="file", format="mp4"),
131
  # examples=[
 
137
  allow_screenshot=False,
138
  )
139
 
 
 
140
  if __name__ == "__main__":
141
  gr.TabbedInterface(
142
  [image_interface, video_interface],
143
  ["Run on Images", "Run on Videos"],
144
  ).launch()
145
 
146
+ # import subprocess
147
+ # import tempfile
148
+ # import time
149
+ # from pathlib import Path
150
+
151
+ # import cv2
152
+ # import gradio as gr
153
+
154
+ # from inferer import Inferer
155
+
156
+ # pipeline = Inferer("alshimaa/model_yolo7", device='cuda')
157
+
158
+
159
+ # def fn_image(image, conf_thres, iou_thres):
160
+ # return pipeline(image, conf_thres, iou_thres)
161
+
162
+
163
+ # def fn_video(video_file, conf_thres, iou_thres, start_sec, duration):
164
+ # start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
165
+ # end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))
166
+
167
+ # suffix = Path(video_file).suffix
168
+
169
+ # clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix)
170
+ # subprocess.call(
171
+ # f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split()
172
+ # )
173
+
174
+ # # Reader of clip file
175
+ # cap = cv2.VideoCapture(clip_temp_file.name)
176
+
177
+ # # This is an intermediary temp file where we'll write the video to
178
+ # # Unfortunately, gradio doesn't play too nice with videos rn so we have to do some hackiness
179
+ # # with ffmpeg at the end of the function here.
180
+ # with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file:
181
+ # out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), 30, (1280, 720))
182
+
183
+ # num_frames = 0
184
+ # max_frames = duration * 30
185
+ # while cap.isOpened():
186
+ # try:
187
+ # ret, frame = cap.read()
188
+ # if not ret:
189
+ # break
190
+ # except Exception as e:
191
+ # print(e)
192
+ # continue
193
+ # print("FRAME DTYPE", type(frame))
194
+ # out.write(pipeline(frame, conf_thres, iou_thres))
195
+ # num_frames += 1
196
+ # print("Processed {} frames".format(num_frames))
197
+ # if num_frames == max_frames:
198
+ # break
199
+
200
+ # out.release()
201
+
202
+ # # Aforementioned hackiness
203
+ # out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False)
204
+ # subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split())
205
+
206
+ # return out_file.name
207
+
208
+
209
+ # image_interface = gr.Interface(
210
+ # fn=fn_image,
211
+ # inputs=[
212
+ # "image",
213
+ # gr.Slider(0, 1, value=0.5, label="Confidence Threshold"),
214
+ # gr.Slider(0, 1, value=0.5, label="IOU Threshold"),
215
+ # ],
216
+ # outputs=gr.Image(type="file"),
217
+ # examples=[["image1.jpg", 0.5, 0.5], ["image2.jpg", 0.25, 0.45], ["image3.jpg", 0.25, 0.45]],
218
+ # title="Smart Environmental Eye (SEE)",
219
+ # allow_flagging=False,
220
+ # allow_screenshot=False,
221
+ # )
222
+
223
+ # video_interface = gr.Interface(
224
+ # fn=fn_video,
225
+ # inputs=[
226
+ # gr.Video(type="file"),
227
+ # gr.Slider(0, 1, value=0.25, label="Confidence Threshold"),
228
+ # gr.Slider(0, 1, value=0.45, label="IOU Threshold"),
229
+ # gr.Slider(0, 10, value=0, label="Start Second", step=1),
230
+ # gr.Slider(0, 10 if pipeline.device.type != 'cpu' else 3, value=4, label="Duration", step=1),
231
+ # ],
232
+ # outputs=gr.Video(type="file", format="mp4"),
233
+ # # examples=[
234
+ # # ["video.mp4", 0.25, 0.45, 0, 2],
235
+
236
+ # # ],
237
+ # title="Smart Environmental Eye (SEE)",
238
+ # allow_flagging=False,
239
+ # allow_screenshot=False,
240
+ # )
241
+
242
+
243
+
244
+ # if __name__ == "__main__":
245
+ # gr.TabbedInterface(
246
+ # [image_interface, video_interface],
247
+ # ["Run on Images", "Run on Videos"],
248
+ # ).launch()
249
+
inferer.py DELETED
@@ -1,238 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- import math
4
- import os.path as osp
5
-
6
- import cv2
7
- import numpy as np
8
- import torch
9
- from huggingface_hub import hf_hub_download
10
- from PIL import Image, ImageFont
11
-
12
- from yolov6.data.data_augment import letterbox
13
- from yolov6.layers.common import DetectBackend
14
- from yolov6.utils.events import LOGGER, load_yaml
15
- from yolov6.utils.nms import non_max_suppression
16
-
17
-
18
- class Inferer:
19
- def __init__(self, model_id, device="cpu", yaml="coco.yaml", img_size=640, half=False):
20
- self.__dict__.update(locals())
21
-
22
- # Init model
23
- self.img_size = img_size
24
- cuda = device != "cpu" and torch.cuda.is_available()
25
- self.device = torch.device("cuda:0" if cuda else "cpu")
26
- self.model = DetectBackend(hf_hub_download(model_id, "model.pt"), device=self.device)
27
- self.stride = self.model.stride
28
- self.class_names = load_yaml(yaml)["names"]
29
- self.img_size = self.check_img_size(self.img_size, s=self.stride) # check image size
30
-
31
- # Half precision
32
- if half & (self.device.type != "cpu"):
33
- self.model.model.half()
34
- else:
35
- self.model.model.float()
36
- half = False
37
-
38
- if self.device.type != "cpu":
39
- self.model(
40
- torch.zeros(1, 3, *self.img_size).to(self.device).type_as(next(self.model.model.parameters()))
41
- ) # warmup
42
-
43
- # Switch model to deploy status
44
- self.model_switch(self.model, self.img_size)
45
-
46
- def model_switch(self, model, img_size):
47
- """Model switch to deploy status"""
48
- from yolov6.layers.common import RepVGGBlock
49
-
50
- for layer in model.modules():
51
- if isinstance(layer, RepVGGBlock):
52
- layer.switch_to_deploy()
53
-
54
- LOGGER.info("Switch model to deploy modality.")
55
-
56
- def __call__(
57
- self,
58
- path_or_image,
59
- conf_thres=0.25,
60
- iou_thres=0.45,
61
- classes=None,
62
- agnostic_nms=False,
63
- max_det=1000,
64
- hide_labels=False,
65
- hide_conf=False,
66
- ):
67
- """Model Inference and results visualization"""
68
-
69
- img, img_src = self.precess_image(path_or_image, self.img_size, self.stride, self.half)
70
- img = img.to(self.device)
71
- if len(img.shape) == 3:
72
- img = img[None]
73
- # expand for batch dim
74
- pred_results = self.model(img)
75
- det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
76
-
77
- gn = torch.tensor(img_src.shape)[[1, 0, 1, 0]] # normalization gain whwh
78
- img_ori = img_src
79
-
80
- # check image and font
81
- assert (
82
- img_ori.data.contiguous
83
- ), "Image needs to be contiguous. Please apply to input images with np.ascontiguousarray(im)."
84
- self.font_check()
85
-
86
- if len(det):
87
- det[:, :4] = self.rescale(img.shape[2:], det[:, :4], img_src.shape).round()
88
-
89
- for *xyxy, conf, cls in reversed(det):
90
- class_num = int(cls) # integer class
91
- label = (
92
- None
93
- if hide_labels
94
- else (self.class_names[class_num] if hide_conf else f"{self.class_names[class_num]} {conf:.2f}")
95
- )
96
-
97
- self.plot_box_and_label(
98
- img_ori,
99
- max(round(sum(img_ori.shape) / 2 * 0.003), 2),
100
- xyxy,
101
- label,
102
- color=self.generate_colors(class_num, True),
103
- )
104
-
105
- img_src = np.asarray(img_ori)
106
-
107
- return img_src
108
-
109
- @staticmethod
110
- def precess_image(path_or_image, img_size, stride, half):
111
- """Process image before image inference."""
112
- if isinstance(path_or_image, str):
113
- try:
114
- img_src = cv2.imread(path_or_image)
115
- assert img_src is not None, f"Invalid image: {path_or_image}"
116
- except Exception as e:
117
- LOGGER.warning(e)
118
- elif isinstance(path_or_image, np.ndarray):
119
- img_src = path_or_image
120
- elif isinstance(path_or_image, Image.Image):
121
- img_src = np.array(path_or_image)
122
-
123
- image = letterbox(img_src, img_size, stride=stride)[0]
124
-
125
- # Convert
126
- image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
127
- image = torch.from_numpy(np.ascontiguousarray(image))
128
- image = image.half() if half else image.float() # uint8 to fp16/32
129
- image /= 255 # 0 - 255 to 0.0 - 1.0
130
-
131
- return image, img_src
132
-
133
- @staticmethod
134
- def rescale(ori_shape, boxes, target_shape):
135
- """Rescale the output to the original image shape"""
136
- ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
137
- padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
138
-
139
- boxes[:, [0, 2]] -= padding[0]
140
- boxes[:, [1, 3]] -= padding[1]
141
- boxes[:, :4] /= ratio
142
-
143
- boxes[:, 0].clamp_(0, target_shape[1]) # x1
144
- boxes[:, 1].clamp_(0, target_shape[0]) # y1
145
- boxes[:, 2].clamp_(0, target_shape[1]) # x2
146
- boxes[:, 3].clamp_(0, target_shape[0]) # y2
147
-
148
- return boxes
149
-
150
- def check_img_size(self, img_size, s=32, floor=0):
151
- """Make sure image size is a multiple of stride s in each dimension, and return a new shape list of image."""
152
- if isinstance(img_size, int): # integer i.e. img_size=640
153
- new_size = max(self.make_divisible(img_size, int(s)), floor)
154
- elif isinstance(img_size, list): # list i.e. img_size=[640, 480]
155
- new_size = [max(self.make_divisible(x, int(s)), floor) for x in img_size]
156
- else:
157
- raise Exception(f"Unsupported type of img_size: {type(img_size)}")
158
-
159
- if new_size != img_size:
160
- print(f"WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}")
161
- return new_size if isinstance(img_size, list) else [new_size] * 2
162
-
163
- def make_divisible(self, x, divisor):
164
- # Upward revision the value x to make it evenly divisible by the divisor.
165
- return math.ceil(x / divisor) * divisor
166
-
167
- @staticmethod
168
- def plot_box_and_label(image, lw, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255)):
169
- # Add one xyxy box to image with label
170
- p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
171
- cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
172
- if label:
173
- tf = max(lw - 1, 1) # font thickness
174
- w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0] # text width, height
175
- outside = p1[1] - h - 3 >= 0 # label fits outside box
176
- p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
177
- cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA) # filled
178
- cv2.putText(
179
- image,
180
- label,
181
- (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
182
- 0,
183
- lw / 3,
184
- txt_color,
185
- thickness=tf,
186
- lineType=cv2.LINE_AA,
187
- )
188
-
189
- @staticmethod
190
- def font_check(font="./yolov6/utils/Arial.ttf", size=10):
191
- # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
192
- assert osp.exists(font), f"font path not exists: {font}"
193
- try:
194
- return ImageFont.truetype(str(font) if font.exists() else font.name, size)
195
- except Exception as e: # download if missing
196
- return ImageFont.truetype(str(font), size)
197
-
198
- @staticmethod
199
- def box_convert(x):
200
- # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
201
- y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
202
- y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
203
- y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
204
- y[:, 2] = x[:, 2] - x[:, 0] # width
205
- y[:, 3] = x[:, 3] - x[:, 1] # height
206
- return y
207
-
208
- @staticmethod
209
- def generate_colors(i, bgr=False):
210
- hex = (
211
- "FF3838",
212
- "FF9D97",
213
- "FF701F",
214
- "FFB21D",
215
- "CFD231",
216
- "48F90A",
217
- "92CC17",
218
- "3DDB86",
219
- "1A9334",
220
- "00D4BB",
221
- "2C99A8",
222
- "00C2FF",
223
- "344593",
224
- "6473FF",
225
- "0018EC",
226
- "8438FF",
227
- "520085",
228
- "CB38FF",
229
- "FF95C8",
230
- "FF37C7",
231
- )
232
- palette = []
233
- for iter in hex:
234
- h = "#" + iter
235
- palette.append(tuple(int(h[1 + i : 1 + i + 2], 16) for i in (0, 2, 4)))
236
- num = len(palette)
237
- color = palette[int(i) % num]
238
- return (color[2], color[1], color[0]) if bgr else color
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/core/engine.py DELETED
@@ -1,273 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- import os
4
- import time
5
- from copy import deepcopy
6
- import os.path as osp
7
-
8
- from tqdm import tqdm
9
-
10
- import numpy as np
11
- import torch
12
- from torch.cuda import amp
13
- from torch.nn.parallel import DistributedDataParallel as DDP
14
- from torch.utils.tensorboard import SummaryWriter
15
-
16
- import tools.eval as eval
17
- from yolov6.data.data_load import create_dataloader
18
- from yolov6.models.yolo import build_model
19
- from yolov6.models.loss import ComputeLoss
20
- from yolov6.utils.events import LOGGER, NCOLS, load_yaml, write_tblog
21
- from yolov6.utils.ema import ModelEMA, de_parallel
22
- from yolov6.utils.checkpoint import load_state_dict, save_checkpoint, strip_optimizer
23
- from yolov6.solver.build import build_optimizer, build_lr_scheduler
24
-
25
- class Trainer:
26
- def __init__(self, args, cfg, device):
27
- self.args = args
28
- self.cfg = cfg
29
- self.device = device
30
-
31
- if args.resume:
32
- self.ckpt = torch.load(args.resume, map_location='cpu')
33
-
34
- self.rank = args.rank
35
- self.local_rank = args.local_rank
36
- self.world_size = args.world_size
37
- self.main_process = self.rank in [-1, 0]
38
- self.save_dir = args.save_dir
39
- # get data loader
40
- self.data_dict = load_yaml(args.data_path)
41
- self.num_classes = self.data_dict['nc']
42
- self.train_loader, self.val_loader = self.get_data_loader(args, cfg, self.data_dict)
43
- # get model and optimizer
44
- model = self.get_model(args, cfg, self.num_classes, device)
45
- self.optimizer = self.get_optimizer(args, cfg, model)
46
- self.scheduler, self.lf = self.get_lr_scheduler(args, cfg, self.optimizer)
47
- self.ema = ModelEMA(model) if self.main_process else None
48
- # tensorboard
49
- self.tblogger = SummaryWriter(self.save_dir) if self.main_process else None
50
- self.start_epoch = 0
51
- #resume
52
- if hasattr(self, "ckpt"):
53
- resume_state_dict = self.ckpt['model'].float().state_dict() # checkpoint state_dict as FP32
54
- model.load_state_dict(resume_state_dict, strict=True) # load
55
- self.start_epoch = self.ckpt['epoch'] + 1
56
- self.optimizer.load_state_dict(self.ckpt['optimizer'])
57
- if self.main_process:
58
- self.ema.ema.load_state_dict(self.ckpt['ema'].float().state_dict())
59
- self.ema.updates = self.ckpt['updates']
60
- self.model = self.parallel_model(args, model, device)
61
- self.model.nc, self.model.names = self.data_dict['nc'], self.data_dict['names']
62
-
63
- self.max_epoch = args.epochs
64
- self.max_stepnum = len(self.train_loader)
65
- self.batch_size = args.batch_size
66
- self.img_size = args.img_size
67
-
68
-
69
- # Training Process
70
-
71
- def train(self):
72
- try:
73
- self.train_before_loop()
74
- for self.epoch in range(self.start_epoch, self.max_epoch):
75
- self.train_in_loop()
76
-
77
- except Exception as _:
78
- LOGGER.error('ERROR in training loop or eval/save model.')
79
- raise
80
- finally:
81
- self.train_after_loop()
82
-
83
- # Training loop for each epoch
84
- def train_in_loop(self):
85
- try:
86
- self.prepare_for_steps()
87
- for self.step, self.batch_data in self.pbar:
88
- self.train_in_steps()
89
- self.print_details()
90
- except Exception as _:
91
- LOGGER.error('ERROR in training steps.')
92
- raise
93
- try:
94
- self.eval_and_save()
95
- except Exception as _:
96
- LOGGER.error('ERROR in evaluate and save model.')
97
- raise
98
-
99
- # Training loop for batchdata
100
- def train_in_steps(self):
101
- images, targets = self.prepro_data(self.batch_data, self.device)
102
- # forward
103
- with amp.autocast(enabled=self.device != 'cpu'):
104
- preds = self.model(images)
105
- total_loss, loss_items = self.compute_loss(preds, targets)
106
- if self.rank != -1:
107
- total_loss *= self.world_size
108
- # backward
109
- self.scaler.scale(total_loss).backward()
110
- self.loss_items = loss_items
111
- self.update_optimizer()
112
-
113
- def eval_and_save(self):
114
- remaining_epochs = self.max_epoch - self.epoch
115
- eval_interval = self.args.eval_interval if remaining_epochs > self.args.heavy_eval_range else 1
116
- is_val_epoch = (not self.args.eval_final_only or (remaining_epochs == 1)) and (self.epoch % eval_interval == 0)
117
- if self.main_process:
118
- self.ema.update_attr(self.model, include=['nc', 'names', 'stride']) # update attributes for ema model
119
- if is_val_epoch:
120
- self.eval_model()
121
- self.ap = self.evaluate_results[0] * 0.1 + self.evaluate_results[1] * 0.9
122
- self.best_ap = max(self.ap, self.best_ap)
123
- # save ckpt
124
- ckpt = {
125
- 'model': deepcopy(de_parallel(self.model)).half(),
126
- 'ema': deepcopy(self.ema.ema).half(),
127
- 'updates': self.ema.updates,
128
- 'optimizer': self.optimizer.state_dict(),
129
- 'epoch': self.epoch,
130
- }
131
-
132
- save_ckpt_dir = osp.join(self.save_dir, 'weights')
133
- save_checkpoint(ckpt, (is_val_epoch) and (self.ap == self.best_ap), save_ckpt_dir, model_name='last_ckpt')
134
- del ckpt
135
- # log for tensorboard
136
- write_tblog(self.tblogger, self.epoch, self.evaluate_results, self.mean_loss)
137
-
138
- def eval_model(self):
139
- results = eval.run(self.data_dict,
140
- batch_size=self.batch_size // self.world_size * 2,
141
- img_size=self.img_size,
142
- model=self.ema.ema,
143
- dataloader=self.val_loader,
144
- save_dir=self.save_dir,
145
- task='train')
146
-
147
- LOGGER.info(f"Epoch: {self.epoch} | mAP@0.5: {results[0]} | mAP@0.50:0.95: {results[1]}")
148
- self.evaluate_results = results[:2]
149
-
150
- def train_before_loop(self):
151
- LOGGER.info('Training start...')
152
- self.start_time = time.time()
153
- self.warmup_stepnum = max(round(self.cfg.solver.warmup_epochs * self.max_stepnum), 1000)
154
- self.scheduler.last_epoch = self.start_epoch - 1
155
- self.last_opt_step = -1
156
- self.scaler = amp.GradScaler(enabled=self.device != 'cpu')
157
-
158
- self.best_ap, self.ap = 0.0, 0.0
159
- self.evaluate_results = (0, 0) # AP50, AP50_95
160
- self.compute_loss = ComputeLoss(iou_type=self.cfg.model.head.iou_type)
161
-
162
- def prepare_for_steps(self):
163
- if self.epoch > self.start_epoch:
164
- self.scheduler.step()
165
- self.model.train()
166
- if self.rank != -1:
167
- self.train_loader.sampler.set_epoch(self.epoch)
168
- self.mean_loss = torch.zeros(4, device=self.device)
169
- self.optimizer.zero_grad()
170
-
171
- LOGGER.info(('\n' + '%10s' * 5) % ('Epoch', 'iou_loss', 'l1_loss', 'obj_loss', 'cls_loss'))
172
- self.pbar = enumerate(self.train_loader)
173
- if self.main_process:
174
- self.pbar = tqdm(self.pbar, total=self.max_stepnum, ncols=NCOLS, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')
175
-
176
- # Print loss after each steps
177
- def print_details(self):
178
- if self.main_process:
179
- self.mean_loss = (self.mean_loss * self.step + self.loss_items) / (self.step + 1)
180
- self.pbar.set_description(('%10s' + '%10.4g' * 4) % (f'{self.epoch}/{self.max_epoch - 1}', \
181
- *(self.mean_loss)))
182
-
183
- # Empty cache if training finished
184
- def train_after_loop(self):
185
- if self.main_process:
186
- LOGGER.info(f'\nTraining completed in {(time.time() - self.start_time) / 3600:.3f} hours.')
187
- save_ckpt_dir = osp.join(self.save_dir, 'weights')
188
- strip_optimizer(save_ckpt_dir, self.epoch) # strip optimizers for saved pt model
189
- if self.device != 'cpu':
190
- torch.cuda.empty_cache()
191
-
192
- def update_optimizer(self):
193
- curr_step = self.step + self.max_stepnum * self.epoch
194
- self.accumulate = max(1, round(64 / self.batch_size))
195
- if curr_step <= self.warmup_stepnum:
196
- self.accumulate = max(1, np.interp(curr_step, [0, self.warmup_stepnum], [1, 64 / self.batch_size]).round())
197
- for k, param in enumerate(self.optimizer.param_groups):
198
- warmup_bias_lr = self.cfg.solver.warmup_bias_lr if k == 2 else 0.0
199
- param['lr'] = np.interp(curr_step, [0, self.warmup_stepnum], [warmup_bias_lr, param['initial_lr'] * self.lf(self.epoch)])
200
- if 'momentum' in param:
201
- param['momentum'] = np.interp(curr_step, [0, self.warmup_stepnum], [self.cfg.solver.warmup_momentum, self.cfg.solver.momentum])
202
- if curr_step - self.last_opt_step >= self.accumulate:
203
- self.scaler.step(self.optimizer)
204
- self.scaler.update()
205
- self.optimizer.zero_grad()
206
- if self.ema:
207
- self.ema.update(self.model)
208
- self.last_opt_step = curr_step
209
-
210
- @staticmethod
211
- def get_data_loader(args, cfg, data_dict):
212
- train_path, val_path = data_dict['train'], data_dict['val']
213
- # check data
214
- nc = int(data_dict['nc'])
215
- class_names = data_dict['names']
216
- assert len(class_names) == nc, f'the length of class names does not match the number of classes defined'
217
- grid_size = max(int(max(cfg.model.head.strides)), 32)
218
- # create train dataloader
219
- train_loader = create_dataloader(train_path, args.img_size, args.batch_size // args.world_size, grid_size,
220
- hyp=dict(cfg.data_aug), augment=True, rect=False, rank=args.local_rank,
221
- workers=args.workers, shuffle=True, check_images=args.check_images,
222
- check_labels=args.check_labels, data_dict=data_dict, task='train')[0]
223
- # create val dataloader
224
- val_loader = None
225
- if args.rank in [-1, 0]:
226
- val_loader = create_dataloader(val_path, args.img_size, args.batch_size // args.world_size * 2, grid_size,
227
- hyp=dict(cfg.data_aug), rect=True, rank=-1, pad=0.5,
228
- workers=args.workers, check_images=args.check_images,
229
- check_labels=args.check_labels, data_dict=data_dict, task='val')[0]
230
-
231
- return train_loader, val_loader
232
-
233
- @staticmethod
234
- def prepro_data(batch_data, device):
235
- images = batch_data[0].to(device, non_blocking=True).float() / 255
236
- targets = batch_data[1].to(device)
237
- return images, targets
238
-
239
- def get_model(self, args, cfg, nc, device):
240
- model = build_model(cfg, nc, device)
241
- weights = cfg.model.pretrained
242
- if weights: # finetune if pretrained model is set
243
- LOGGER.info(f'Loading state_dict from {weights} for fine-tuning...')
244
- model = load_state_dict(weights, model, map_location=device)
245
- LOGGER.info('Model: {}'.format(model))
246
- return model
247
-
248
- @staticmethod
249
- def parallel_model(args, model, device):
250
- # If DP mode
251
- dp_mode = device.type != 'cpu' and args.rank == -1
252
- if dp_mode and torch.cuda.device_count() > 1:
253
- LOGGER.warning('WARNING: DP not recommended, use DDP instead.\n')
254
- model = torch.nn.DataParallel(model)
255
-
256
- # If DDP mode
257
- ddp_mode = device.type != 'cpu' and args.rank != -1
258
- if ddp_mode:
259
- model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank)
260
-
261
- return model
262
-
263
- def get_optimizer(self, args, cfg, model):
264
- accumulate = max(1, round(64 / args.batch_size))
265
- cfg.solver.weight_decay *= args.batch_size * accumulate / 64
266
- optimizer = build_optimizer(cfg, model)
267
- return optimizer
268
-
269
- @staticmethod
270
- def get_lr_scheduler(args, cfg, optimizer):
271
- epochs = args.epochs
272
- lr_scheduler, lf = build_lr_scheduler(cfg, optimizer, epochs)
273
- return lr_scheduler, lf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/core/evaler.py DELETED
@@ -1,256 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- import os
4
- from tqdm import tqdm
5
- import numpy as np
6
- import json
7
- import torch
8
- import yaml
9
- from pathlib import Path
10
-
11
- from pycocotools.coco import COCO
12
- from pycocotools.cocoeval import COCOeval
13
-
14
- from yolov6.data.data_load import create_dataloader
15
- from yolov6.utils.events import LOGGER, NCOLS
16
- from yolov6.utils.nms import non_max_suppression
17
- from yolov6.utils.checkpoint import load_checkpoint
18
- from yolov6.utils.torch_utils import time_sync, get_model_info
19
-
20
- '''
21
- python tools/eval.py --task 'train'/'val'/'speed'
22
- '''
23
-
24
-
25
- class Evaler:
26
- def __init__(self,
27
- data,
28
- batch_size=32,
29
- img_size=640,
30
- conf_thres=0.001,
31
- iou_thres=0.65,
32
- device='',
33
- half=True,
34
- save_dir=''):
35
- self.data = data
36
- self.batch_size = batch_size
37
- self.img_size = img_size
38
- self.conf_thres = conf_thres
39
- self.iou_thres = iou_thres
40
- self.device = device
41
- self.half = half
42
- self.save_dir = save_dir
43
-
44
- def init_model(self, model, weights, task):
45
- if task != 'train':
46
- model = load_checkpoint(weights, map_location=self.device)
47
- self.stride = int(model.stride.max())
48
- if self.device.type != 'cpu':
49
- model(torch.zeros(1, 3, self.img_size, self.img_size).to(self.device).type_as(next(model.parameters())))
50
- # switch to deploy
51
- from yolov6.layers.common import RepVGGBlock
52
- for layer in model.modules():
53
- if isinstance(layer, RepVGGBlock):
54
- layer.switch_to_deploy()
55
- LOGGER.info("Switch model to deploy modality.")
56
- LOGGER.info("Model Summary: {}".format(get_model_info(model, self.img_size)))
57
- model.half() if self.half else model.float()
58
- return model
59
-
60
- def init_data(self, dataloader, task):
61
- '''Initialize dataloader.
62
- Returns a dataloader for task val or speed.
63
- '''
64
- self.is_coco = self.data.get("is_coco", False)
65
- self.ids = self.coco80_to_coco91_class() if self.is_coco else list(range(1000))
66
- if task != 'train':
67
- pad = 0.0 if task == 'speed' else 0.5
68
- dataloader = create_dataloader(self.data[task if task in ('train', 'val', 'test') else 'val'],
69
- self.img_size, self.batch_size, self.stride, check_labels=True, pad=pad, rect=True,
70
- data_dict=self.data, task=task)[0]
71
- return dataloader
72
-
73
- def predict_model(self, model, dataloader, task):
74
- '''Model prediction
75
- Predicts the whole dataset and gets the prediced results and inference time.
76
- '''
77
- self.speed_result = torch.zeros(4, device=self.device)
78
- pred_results = []
79
- pbar = tqdm(dataloader, desc="Inferencing model in val datasets.", ncols=NCOLS)
80
- for imgs, targets, paths, shapes in pbar:
81
- # pre-process
82
- t1 = time_sync()
83
- imgs = imgs.to(self.device, non_blocking=True)
84
- imgs = imgs.half() if self.half else imgs.float()
85
- imgs /= 255
86
- self.speed_result[1] += time_sync() - t1 # pre-process time
87
-
88
- # Inference
89
- t2 = time_sync()
90
- outputs = model(imgs)
91
- self.speed_result[2] += time_sync() - t2 # inference time
92
-
93
- # post-process
94
- t3 = time_sync()
95
- outputs = non_max_suppression(outputs, self.conf_thres, self.iou_thres, multi_label=True)
96
- self.speed_result[3] += time_sync() - t3 # post-process time
97
- self.speed_result[0] += len(outputs)
98
-
99
- # save result
100
- pred_results.extend(self.convert_to_coco_format(outputs, imgs, paths, shapes, self.ids))
101
- return pred_results
102
-
103
- def eval_model(self, pred_results, model, dataloader, task):
104
- '''Evaluate models
105
- For task speed, this function only evaluates the speed of model and outputs inference time.
106
- For task val, this function evaluates the speed and mAP by pycocotools, and returns
107
- inference time and mAP value.
108
- '''
109
- LOGGER.info(f'\nEvaluating speed.')
110
- self.eval_speed(task)
111
-
112
- LOGGER.info(f'\nEvaluating mAP by pycocotools.')
113
- if task != 'speed' and len(pred_results):
114
- if 'anno_path' in self.data:
115
- anno_json = self.data['anno_path']
116
- else:
117
- # generated coco format labels in dataset initialization
118
- dataset_root = os.path.dirname(os.path.dirname(self.data['val']))
119
- base_name = os.path.basename(self.data['val'])
120
- anno_json = os.path.join(dataset_root, 'annotations', f'instances_{base_name}.json')
121
- pred_json = os.path.join(self.save_dir, "predictions.json")
122
- LOGGER.info(f'Saving {pred_json}...')
123
- with open(pred_json, 'w') as f:
124
- json.dump(pred_results, f)
125
-
126
- anno = COCO(anno_json)
127
- pred = anno.loadRes(pred_json)
128
- cocoEval = COCOeval(anno, pred, 'bbox')
129
- if self.is_coco:
130
- imgIds = [int(os.path.basename(x).split(".")[0])
131
- for x in dataloader.dataset.img_paths]
132
- cocoEval.params.imgIds = imgIds
133
- cocoEval.evaluate()
134
- cocoEval.accumulate()
135
- cocoEval.summarize()
136
- map, map50 = cocoEval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)
137
- # Return results
138
- model.float() # for training
139
- if task != 'train':
140
- LOGGER.info(f"Results saved to {self.save_dir}")
141
- return (map50, map)
142
- return (0.0, 0.0)
143
-
144
- def eval_speed(self, task):
145
- '''Evaluate model inference speed.'''
146
- if task != 'train':
147
- n_samples = self.speed_result[0].item()
148
- pre_time, inf_time, nms_time = 1000 * self.speed_result[1:].cpu().numpy() / n_samples
149
- for n, v in zip(["pre-process", "inference", "NMS"],[pre_time, inf_time, nms_time]):
150
- LOGGER.info("Average {} time: {:.2f} ms".format(n, v))
151
-
152
- def box_convert(self, x):
153
- # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
154
- y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
155
- y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
156
- y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
157
- y[:, 2] = x[:, 2] - x[:, 0] # width
158
- y[:, 3] = x[:, 3] - x[:, 1] # height
159
- return y
160
-
161
- def scale_coords(self, img1_shape, coords, img0_shape, ratio_pad=None):
162
- # Rescale coords (xyxy) from img1_shape to img0_shape
163
- if ratio_pad is None: # calculate from img0_shape
164
- gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
165
- pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
166
- else:
167
- gain = ratio_pad[0][0]
168
- pad = ratio_pad[1]
169
-
170
- coords[:, [0, 2]] -= pad[0] # x padding
171
- coords[:, [1, 3]] -= pad[1] # y padding
172
- coords[:, :4] /= gain
173
- if isinstance(coords, torch.Tensor): # faster individually
174
- coords[:, 0].clamp_(0, img0_shape[1]) # x1
175
- coords[:, 1].clamp_(0, img0_shape[0]) # y1
176
- coords[:, 2].clamp_(0, img0_shape[1]) # x2
177
- coords[:, 3].clamp_(0, img0_shape[0]) # y2
178
- else: # np.array (faster grouped)
179
- coords[:, [0, 2]] = coords[:, [0, 2]].clip(0, img0_shape[1]) # x1, x2
180
- coords[:, [1, 3]] = coords[:, [1, 3]].clip(0, img0_shape[0]) # y1, y2
181
- return coords
182
-
183
- def convert_to_coco_format(self, outputs, imgs, paths, shapes, ids):
184
- pred_results = []
185
- for i, pred in enumerate(outputs):
186
- if len(pred) == 0:
187
- continue
188
- path, shape = Path(paths[i]), shapes[i][0]
189
- self.scale_coords(imgs[i].shape[1:], pred[:, :4], shape, shapes[i][1])
190
- image_id = int(path.stem) if path.stem.isnumeric() else path.stem
191
- bboxes = self.box_convert(pred[:, 0:4])
192
- bboxes[:, :2] -= bboxes[:, 2:] / 2
193
- cls = pred[:, 5]
194
- scores = pred[:, 4]
195
- for ind in range(pred.shape[0]):
196
- category_id = ids[int(cls[ind])]
197
- bbox = [round(x, 3) for x in bboxes[ind].tolist()]
198
- score = round(scores[ind].item(), 5)
199
- pred_data = {
200
- "image_id": image_id,
201
- "category_id": category_id,
202
- "bbox": bbox,
203
- "score": score
204
- }
205
- pred_results.append(pred_data)
206
- return pred_results
207
-
208
- @staticmethod
209
- def check_task(task):
210
- if task not in ['train','val','speed']:
211
- raise Exception("task argument error: only support 'train' / 'val' / 'speed' task.")
212
-
213
- @staticmethod
214
- def reload_thres(conf_thres, iou_thres, task):
215
- '''Sets conf and iou threshold for task val/speed'''
216
- if task != 'train':
217
- if task == 'val':
218
- conf_thres = 0.001
219
- if task == 'speed':
220
- conf_thres = 0.25
221
- iou_thres = 0.45
222
- return conf_thres, iou_thres
223
-
224
- @staticmethod
225
- def reload_device(device, model, task):
226
- # device = 'cpu' or '0' or '0,1,2,3'
227
- if task == 'train':
228
- device = next(model.parameters()).device
229
- else:
230
- if device == 'cpu':
231
- os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
232
- elif device:
233
- os.environ['CUDA_VISIBLE_DEVICES'] = device
234
- assert torch.cuda.is_available()
235
- cuda = device != 'cpu' and torch.cuda.is_available()
236
- device = torch.device('cuda:0' if cuda else 'cpu')
237
- return device
238
-
239
- @staticmethod
240
- def reload_dataset(data):
241
- with open(data, errors='ignore') as yaml_file:
242
- data = yaml.safe_load(yaml_file)
243
- val = data.get('val')
244
- if not os.path.exists(val):
245
- raise Exception('Dataset not found.')
246
- return data
247
-
248
- @staticmethod
249
- def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
250
- # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
251
- x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
252
- 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
253
- 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
254
- 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79,
255
- 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
256
- return x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/core/inferer.py DELETED
@@ -1,231 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- import os
4
- import os.path as osp
5
- import math
6
- from tqdm import tqdm
7
- import numpy as np
8
- import cv2
9
- import torch
10
- from PIL import ImageFont
11
-
12
- from yolov6.utils.events import LOGGER, load_yaml
13
- from yolov6.layers.common import DetectBackend
14
- from yolov6.data.data_augment import letterbox
15
- from yolov6.utils.nms import non_max_suppression
16
- from yolov6.utils.torch_utils import get_model_info
17
-
18
-
19
- class Inferer:
20
- def __init__(self, source, weights, device, yaml, img_size, half):
21
- import glob
22
- from yolov6.data.datasets import IMG_FORMATS
23
-
24
- self.__dict__.update(locals())
25
-
26
- # Init model
27
- self.device = device
28
- self.img_size = img_size
29
- cuda = self.device != 'cpu' and torch.cuda.is_available()
30
- self.device = torch.device('cuda:0' if cuda else 'cpu')
31
- self.model = DetectBackend(weights, device=self.device)
32
- self.stride = self.model.stride
33
- self.class_names = load_yaml(yaml)['names']
34
- self.img_size = self.check_img_size(self.img_size, s=self.stride) # check image size
35
-
36
- # Half precision
37
- if half & (self.device.type != 'cpu'):
38
- self.model.model.half()
39
- else:
40
- self.model.model.float()
41
- half = False
42
-
43
- if self.device.type != 'cpu':
44
- self.model(torch.zeros(1, 3, *self.img_size).to(self.device).type_as(next(self.model.model.parameters()))) # warmup
45
-
46
- # Load data
47
- if os.path.isdir(source):
48
- img_paths = sorted(glob.glob(os.path.join(source, '*.*'))) # dir
49
- elif os.path.isfile(source):
50
- img_paths = [source] # files
51
- else:
52
- raise Exception(f'Invalid path: {source}')
53
- self.img_paths = [img_path for img_path in img_paths if img_path.split('.')[-1].lower() in IMG_FORMATS]
54
-
55
- # Switch model to deploy status
56
- self.model_switch(self.model, self.img_size)
57
-
58
- def model_switch(self, model, img_size):
59
- ''' Model switch to deploy status '''
60
- from yolov6.layers.common import RepVGGBlock
61
- for layer in model.modules():
62
- if isinstance(layer, RepVGGBlock):
63
- layer.switch_to_deploy()
64
-
65
- LOGGER.info("Switch model to deploy modality.")
66
-
67
- def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, save_img, hide_labels, hide_conf):
68
- ''' Model Inference and results visualization '''
69
-
70
- for img_path in tqdm(self.img_paths):
71
- img, img_src = self.precess_image(img_path, self.img_size, self.stride, self.half)
72
- img = img.to(self.device)
73
- if len(img.shape) == 3:
74
- img = img[None]
75
- # expand for batch dim
76
- pred_results = self.model(img)
77
- det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
78
-
79
- save_path = osp.join(save_dir, osp.basename(img_path)) # im.jpg
80
- txt_path = osp.join(save_dir, 'labels', osp.splitext(osp.basename(img_path))[0])
81
-
82
- gn = torch.tensor(img_src.shape)[[1, 0, 1, 0]] # normalization gain whwh
83
- img_ori = img_src
84
-
85
- # check image and font
86
- assert img_ori.data.contiguous, 'Image needs to be contiguous. Please apply to input images with np.ascontiguousarray(im).'
87
- self.font_check()
88
-
89
- if len(det):
90
- det[:, :4] = self.rescale(img.shape[2:], det[:, :4], img_src.shape).round()
91
-
92
- for *xyxy, conf, cls in reversed(det):
93
- if save_txt: # Write to file
94
- xywh = (self.box_convert(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
95
- line = (cls, *xywh, conf)
96
- with open(txt_path + '.txt', 'a') as f:
97
- f.write(('%g ' * len(line)).rstrip() % line + '\n')
98
-
99
- if save_img:
100
- class_num = int(cls) # integer class
101
- label = None if hide_labels else (self.class_names[class_num] if hide_conf else f'{self.class_names[class_num]} {conf:.2f}')
102
-
103
- self.plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=self.generate_colors(class_num, True))
104
-
105
- img_src = np.asarray(img_ori)
106
-
107
- # Save results (image with detections)
108
- if save_img:
109
- cv2.imwrite(save_path, img_src)
110
-
111
- @staticmethod
112
- def precess_image(path, img_size, stride, half):
113
- '''Process image before image inference.'''
114
- try:
115
- img_src = cv2.imread(path)
116
- assert img_src is not None, f'Invalid image: {path}'
117
- except Exception as e:
118
- LOGGER.warning(e)
119
- image = letterbox(img_src, img_size, stride=stride)[0]
120
-
121
- # Convert
122
- image = image.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
123
- image = torch.from_numpy(np.ascontiguousarray(image))
124
- image = image.half() if half else image.float() # uint8 to fp16/32
125
- image /= 255 # 0 - 255 to 0.0 - 1.0
126
-
127
- return image, img_src
128
-
129
- @staticmethod
130
- def rescale(ori_shape, boxes, target_shape):
131
- '''Rescale the output to the original image shape'''
132
- ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
133
- padding = (ori_shape[1] - target_shape[1] * ratio) / 2, (ori_shape[0] - target_shape[0] * ratio) / 2
134
-
135
- boxes[:, [0, 2]] -= padding[0]
136
- boxes[:, [1, 3]] -= padding[1]
137
- boxes[:, :4] /= ratio
138
-
139
- boxes[:, 0].clamp_(0, target_shape[1]) # x1
140
- boxes[:, 1].clamp_(0, target_shape[0]) # y1
141
- boxes[:, 2].clamp_(0, target_shape[1]) # x2
142
- boxes[:, 3].clamp_(0, target_shape[0]) # y2
143
-
144
- return boxes
145
-
146
- def check_img_size(self, img_size, s=32, floor=0):
147
- """Make sure image size is a multiple of stride s in each dimension, and return a new shape list of image."""
148
- if isinstance(img_size, int): # integer i.e. img_size=640
149
- new_size = max(self.make_divisible(img_size, int(s)), floor)
150
- elif isinstance(img_size, list): # list i.e. img_size=[640, 480]
151
- new_size = [max(self.make_divisible(x, int(s)), floor) for x in img_size]
152
- else:
153
- raise Exception(f"Unsupported type of img_size: {type(img_size)}")
154
-
155
- if new_size != img_size:
156
- print(f'WARNING: --img-size {img_size} must be multiple of max stride {s}, updating to {new_size}')
157
- return new_size if isinstance(img_size,list) else [new_size]*2
158
-
159
- def make_divisible(self, x, divisor):
160
- # Upward revision the value x to make it evenly divisible by the divisor.
161
- return math.ceil(x / divisor) * divisor
162
-
163
- @staticmethod
164
- def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
165
- # Add one xyxy box to image with label
166
- p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
167
- cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
168
- if label:
169
- tf = max(lw - 1, 1) # font thickness
170
- w, h = cv2.getTextSize(label, 0, fontScale=lw / 3, thickness=tf)[0] # text width, height
171
- outside = p1[1] - h - 3 >= 0 # label fits outside box
172
- p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
173
- cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA) # filled
174
- cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), 0, lw / 3, txt_color,
175
- thickness=tf, lineType=cv2.LINE_AA)
176
-
177
- @staticmethod
178
- def font_check(font='./yolov6/utils/Arial.ttf', size=10):
179
- # Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
180
- assert osp.exists(font), f'font path not exists: {font}'
181
- try:
182
- return ImageFont.truetype(str(font) if font.exists() else font.name, size)
183
- except Exception as e: # download if missing
184
- return ImageFont.truetype(str(font), size)
185
-
186
- @staticmethod
187
- def box_convert(x):
188
- # Convert boxes with shape [n, 4] from [x1, y1, x2, y2] to [x, y, w, h] where x1y1=top-left, x2y2=bottom-right
189
- y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
190
- y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
191
- y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
192
- y[:, 2] = x[:, 2] - x[:, 0] # width
193
- y[:, 3] = x[:, 3] - x[:, 1] # height
194
- return y
195
-
196
- @staticmethod
197
- def generate_colors(i, bgr=False):
198
- hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
199
- '2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
200
- palette = []
201
- for iter in hex:
202
- h = '#' + iter
203
- palette.append(tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)))
204
- num = len(palette)
205
- color = palette[int(i) % num]
206
- return (color[2], color[1], color[0]) if bgr else color
207
-
208
-
209
- class VideoInferer(Inferer):
210
-
211
- def setup_source(self, source):
212
- # Load data
213
- if os.path.isfile(source):
214
- self.vid_path = source
215
- self.vid_name = '.'.join(os.path.basename(source).split('.')[:-1])
216
- else:
217
- raise Exception(f'Invalid path: {source}')
218
-
219
- self.cap = cv2.VideoCapture(self.vid_path)
220
-
221
- def iterator_length(self):
222
- return int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
223
-
224
- def img_iterator(self):
225
- cur_fid = 0
226
- ret, frame = self.cap.read()
227
-
228
- while ret:
229
- yield frame, f'{self.vid_name}_frame_{cur_fid:06}.jpg'
230
- ret, frame = self.cap.read()
231
- cur_fid += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/data/data_augment.py DELETED
@@ -1,193 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- # This code is based on
4
- # https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py
5
-
6
- import math
7
- import random
8
-
9
- import cv2
10
- import numpy as np
11
-
12
-
13
- def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
14
- # HSV color-space augmentation
15
- if hgain or sgain or vgain:
16
- r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
17
- hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
18
- dtype = im.dtype # uint8
19
-
20
- x = np.arange(0, 256, dtype=r.dtype)
21
- lut_hue = ((x * r[0]) % 180).astype(dtype)
22
- lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
23
- lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
24
-
25
- im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
26
- cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
27
-
28
-
29
- def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
30
- # Resize and pad image while meeting stride-multiple constraints
31
- shape = im.shape[:2] # current shape [height, width]
32
- if isinstance(new_shape, int):
33
- new_shape = (new_shape, new_shape)
34
-
35
- # Scale ratio (new / old)
36
- r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
37
- if not scaleup: # only scale down, do not scale up (for better val mAP)
38
- r = min(r, 1.0)
39
-
40
- # Compute padding
41
- new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
42
- dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
43
-
44
- if auto: # minimum rectangle
45
- dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
46
-
47
- dw /= 2 # divide padding into 2 sides
48
- dh /= 2
49
-
50
- if shape[::-1] != new_unpad: # resize
51
- im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
52
- top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
53
- left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
54
- im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
55
- return im, r, (dw, dh)
56
-
57
-
58
- def mixup(im, labels, im2, labels2):
59
- # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
60
- r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
61
- im = (im * r + im2 * (1 - r)).astype(np.uint8)
62
- labels = np.concatenate((labels, labels2), 0)
63
- return im, labels
64
-
65
-
66
- def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
67
- # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
68
- w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
69
- w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
70
- ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
71
- return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
72
-
73
-
74
- def random_affine(img, labels=(), degrees=10, translate=.1, scale=.1, shear=10,
75
- new_shape=(640, 640)):
76
-
77
- n = len(labels)
78
- height, width = new_shape
79
-
80
- M, s = get_transform_matrix(img.shape[:2], (height, width), degrees, scale, shear, translate)
81
- if (M != np.eye(3)).any(): # image changed
82
- img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
83
-
84
- # Transform label coordinates
85
- if n:
86
- new = np.zeros((n, 4))
87
-
88
- xy = np.ones((n * 4, 3))
89
- xy[:, :2] = labels[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
90
- xy = xy @ M.T # transform
91
- xy = xy[:, :2].reshape(n, 8) # perspective rescale or affine
92
-
93
- # create new boxes
94
- x = xy[:, [0, 2, 4, 6]]
95
- y = xy[:, [1, 3, 5, 7]]
96
- new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
97
-
98
- # clip
99
- new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
100
- new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
101
-
102
- # filter candidates
103
- i = box_candidates(box1=labels[:, 1:5].T * s, box2=new.T, area_thr=0.1)
104
- labels = labels[i]
105
- labels[:, 1:5] = new[i]
106
-
107
- return img, labels
108
-
109
-
110
- def get_transform_matrix(img_shape, new_shape, degrees, scale, shear, translate):
111
- new_height, new_width = new_shape
112
- # Center
113
- C = np.eye(3)
114
- C[0, 2] = -img_shape[1] / 2 # x translation (pixels)
115
- C[1, 2] = -img_shape[0] / 2 # y translation (pixels)
116
-
117
- # Rotation and Scale
118
- R = np.eye(3)
119
- a = random.uniform(-degrees, degrees)
120
- # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
121
- s = random.uniform(1 - scale, 1 + scale)
122
- # s = 2 ** random.uniform(-scale, scale)
123
- R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
124
-
125
- # Shear
126
- S = np.eye(3)
127
- S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
128
- S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
129
-
130
- # Translation
131
- T = np.eye(3)
132
- T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * new_width # x translation (pixels)
133
- T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * new_height # y transla ion (pixels)
134
-
135
- # Combined rotation matrix
136
- M = T @ S @ R @ C # order of operations (right to left) is IMPORTANT
137
- return M, s
138
-
139
-
140
- def mosaic_augmentation(img_size, imgs, hs, ws, labels, hyp):
141
-
142
- assert len(imgs) == 4, "Mosaic augmentation of current version only supports 4 images."
143
-
144
- labels4 = []
145
- s = img_size
146
- yc, xc = (int(random.uniform(s//2, 3*s//2)) for _ in range(2)) # mosaic center x, y
147
- for i in range(len(imgs)):
148
- # Load image
149
- img, h, w = imgs[i], hs[i], ws[i]
150
- # place img in img4
151
- if i == 0: # top left
152
- img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
153
- x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
154
- x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
155
- elif i == 1: # top right
156
- x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
157
- x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
158
- elif i == 2: # bottom left
159
- x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
160
- x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
161
- elif i == 3: # bottom right
162
- x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
163
- x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
164
-
165
- img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
166
- padw = x1a - x1b
167
- padh = y1a - y1b
168
-
169
- # Labels
170
- labels_per_img = labels[i].copy()
171
- if labels_per_img.size:
172
- boxes = np.copy(labels_per_img[:, 1:])
173
- boxes[:, 0] = w * (labels_per_img[:, 1] - labels_per_img[:, 3] / 2) + padw # top left x
174
- boxes[:, 1] = h * (labels_per_img[:, 2] - labels_per_img[:, 4] / 2) + padh # top left y
175
- boxes[:, 2] = w * (labels_per_img[:, 1] + labels_per_img[:, 3] / 2) + padw # bottom right x
176
- boxes[:, 3] = h * (labels_per_img[:, 2] + labels_per_img[:, 4] / 2) + padh # bottom right y
177
- labels_per_img[:, 1:] = boxes
178
-
179
- labels4.append(labels_per_img)
180
-
181
- # Concat/clip labels
182
- labels4 = np.concatenate(labels4, 0)
183
- for x in (labels4[:, 1:]):
184
- np.clip(x, 0, 2 * s, out=x)
185
-
186
- # Augment
187
- img4, labels4 = random_affine(img4, labels4,
188
- degrees=hyp['degrees'],
189
- translate=hyp['translate'],
190
- scale=hyp['scale'],
191
- shear=hyp['shear'])
192
-
193
- return img4, labels4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/data/data_load.py DELETED
@@ -1,113 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- # This code is based on
4
- # https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py
5
-
6
- import os
7
- from torch.utils.data import dataloader, distributed
8
-
9
- from .datasets import TrainValDataset
10
- from yolov6.utils.events import LOGGER
11
- from yolov6.utils.torch_utils import torch_distributed_zero_first
12
-
13
-
14
- def create_dataloader(
15
- path,
16
- img_size,
17
- batch_size,
18
- stride,
19
- hyp=None,
20
- augment=False,
21
- check_images=False,
22
- check_labels=False,
23
- pad=0.0,
24
- rect=False,
25
- rank=-1,
26
- workers=8,
27
- shuffle=False,
28
- data_dict=None,
29
- task="Train",
30
- ):
31
- """Create general dataloader.
32
-
33
- Returns dataloader and dataset
34
- """
35
- if rect and shuffle:
36
- LOGGER.warning(
37
- "WARNING: --rect is incompatible with DataLoader shuffle, setting shuffle=False"
38
- )
39
- shuffle = False
40
- with torch_distributed_zero_first(rank):
41
- dataset = TrainValDataset(
42
- path,
43
- img_size,
44
- batch_size,
45
- augment=augment,
46
- hyp=hyp,
47
- rect=rect,
48
- check_images=check_images,
49
- check_labels=check_labels,
50
- stride=int(stride),
51
- pad=pad,
52
- rank=rank,
53
- data_dict=data_dict,
54
- task=task,
55
- )
56
-
57
- batch_size = min(batch_size, len(dataset))
58
- workers = min(
59
- [
60
- os.cpu_count() // int(os.getenv("WORLD_SIZE", 1)),
61
- batch_size if batch_size > 1 else 0,
62
- workers,
63
- ]
64
- ) # number of workers
65
- sampler = (
66
- None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
67
- )
68
- return (
69
- TrainValDataLoader(
70
- dataset,
71
- batch_size=batch_size,
72
- shuffle=shuffle and sampler is None,
73
- num_workers=workers,
74
- sampler=sampler,
75
- pin_memory=True,
76
- collate_fn=TrainValDataset.collate_fn,
77
- ),
78
- dataset,
79
- )
80
-
81
-
82
- class TrainValDataLoader(dataloader.DataLoader):
83
- """Dataloader that reuses workers
84
-
85
- Uses same syntax as vanilla DataLoader
86
- """
87
-
88
- def __init__(self, *args, **kwargs):
89
- super().__init__(*args, **kwargs)
90
- object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
91
- self.iterator = super().__iter__()
92
-
93
- def __len__(self):
94
- return len(self.batch_sampler.sampler)
95
-
96
- def __iter__(self):
97
- for i in range(len(self)):
98
- yield next(self.iterator)
99
-
100
-
101
- class _RepeatSampler:
102
- """Sampler that repeats forever
103
-
104
- Args:
105
- sampler (Sampler)
106
- """
107
-
108
- def __init__(self, sampler):
109
- self.sampler = sampler
110
-
111
- def __iter__(self):
112
- while True:
113
- yield from iter(self.sampler)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/data/datasets.py DELETED
@@ -1,550 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
-
4
- import glob
5
- import os
6
- import os.path as osp
7
- import random
8
- import json
9
- import time
10
- import hashlib
11
-
12
- from multiprocessing.pool import Pool
13
-
14
- import cv2
15
- import numpy as np
16
- import torch
17
- from PIL import ExifTags, Image, ImageOps
18
- from torch.utils.data import Dataset
19
- from tqdm import tqdm
20
-
21
- from .data_augment import (
22
- augment_hsv,
23
- letterbox,
24
- mixup,
25
- random_affine,
26
- mosaic_augmentation,
27
- )
28
- from yolov6.utils.events import LOGGER
29
-
30
- # Parameters
31
- IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo"]
32
- # Get orientation exif tag
33
- for k, v in ExifTags.TAGS.items():
34
- if v == "Orientation":
35
- ORIENTATION = k
36
- break
37
-
38
-
39
- class TrainValDataset(Dataset):
40
- # YOLOv6 train_loader/val_loader, loads images and labels for training and validation
41
- def __init__(
42
- self,
43
- img_dir,
44
- img_size=640,
45
- batch_size=16,
46
- augment=False,
47
- hyp=None,
48
- rect=False,
49
- check_images=False,
50
- check_labels=False,
51
- stride=32,
52
- pad=0.0,
53
- rank=-1,
54
- data_dict=None,
55
- task="train",
56
- ):
57
- assert task.lower() in ("train", "val", "speed"), f"Not supported task: {task}"
58
- t1 = time.time()
59
- self.__dict__.update(locals())
60
- self.main_process = self.rank in (-1, 0)
61
- self.task = self.task.capitalize()
62
- self.class_names = data_dict["names"]
63
- self.img_paths, self.labels = self.get_imgs_labels(self.img_dir)
64
- if self.rect:
65
- shapes = [self.img_info[p]["shape"] for p in self.img_paths]
66
- self.shapes = np.array(shapes, dtype=np.float64)
67
- self.batch_indices = np.floor(
68
- np.arange(len(shapes)) / self.batch_size
69
- ).astype(
70
- np.int
71
- ) # batch indices of each image
72
- self.sort_files_shapes()
73
- t2 = time.time()
74
- if self.main_process:
75
- LOGGER.info(f"%.1fs for dataset initialization." % (t2 - t1))
76
-
77
- def __len__(self):
78
- """Get the length of dataset"""
79
- return len(self.img_paths)
80
-
81
- def __getitem__(self, index):
82
- """Fetching a data sample for a given key.
83
- This function applies mosaic and mixup augments during training.
84
- During validation, letterbox augment is applied.
85
- """
86
- # Mosaic Augmentation
87
- if self.augment and random.random() < self.hyp["mosaic"]:
88
- img, labels = self.get_mosaic(index)
89
- shapes = None
90
-
91
- # MixUp augmentation
92
- if random.random() < self.hyp["mixup"]:
93
- img_other, labels_other = self.get_mosaic(
94
- random.randint(0, len(self.img_paths) - 1)
95
- )
96
- img, labels = mixup(img, labels, img_other, labels_other)
97
-
98
- else:
99
- # Load image
100
- img, (h0, w0), (h, w) = self.load_image(index)
101
-
102
- # Letterbox
103
- shape = (
104
- self.batch_shapes[self.batch_indices[index]]
105
- if self.rect
106
- else self.img_size
107
- ) # final letterboxed shape
108
- img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
109
- shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
110
-
111
- labels = self.labels[index].copy()
112
- if labels.size:
113
- w *= ratio
114
- h *= ratio
115
- # new boxes
116
- boxes = np.copy(labels[:, 1:])
117
- boxes[:, 0] = (
118
- w * (labels[:, 1] - labels[:, 3] / 2) + pad[0]
119
- ) # top left x
120
- boxes[:, 1] = (
121
- h * (labels[:, 2] - labels[:, 4] / 2) + pad[1]
122
- ) # top left y
123
- boxes[:, 2] = (
124
- w * (labels[:, 1] + labels[:, 3] / 2) + pad[0]
125
- ) # bottom right x
126
- boxes[:, 3] = (
127
- h * (labels[:, 2] + labels[:, 4] / 2) + pad[1]
128
- ) # bottom right y
129
- labels[:, 1:] = boxes
130
-
131
- if self.augment:
132
- img, labels = random_affine(
133
- img,
134
- labels,
135
- degrees=self.hyp["degrees"],
136
- translate=self.hyp["translate"],
137
- scale=self.hyp["scale"],
138
- shear=self.hyp["shear"],
139
- new_shape=(self.img_size, self.img_size),
140
- )
141
-
142
- if len(labels):
143
- h, w = img.shape[:2]
144
-
145
- labels[:, [1, 3]] = labels[:, [1, 3]].clip(0, w - 1e-3) # x1, x2
146
- labels[:, [2, 4]] = labels[:, [2, 4]].clip(0, h - 1e-3) # y1, y2
147
-
148
- boxes = np.copy(labels[:, 1:])
149
- boxes[:, 0] = ((labels[:, 1] + labels[:, 3]) / 2) / w # x center
150
- boxes[:, 1] = ((labels[:, 2] + labels[:, 4]) / 2) / h # y center
151
- boxes[:, 2] = (labels[:, 3] - labels[:, 1]) / w # width
152
- boxes[:, 3] = (labels[:, 4] - labels[:, 2]) / h # height
153
- labels[:, 1:] = boxes
154
-
155
- if self.augment:
156
- img, labels = self.general_augment(img, labels)
157
-
158
- labels_out = torch.zeros((len(labels), 6))
159
- if len(labels):
160
- labels_out[:, 1:] = torch.from_numpy(labels)
161
-
162
- # Convert
163
- img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
164
- img = np.ascontiguousarray(img)
165
-
166
- return torch.from_numpy(img), labels_out, self.img_paths[index], shapes
167
-
168
- def load_image(self, index):
169
- """Load image.
170
- This function loads image by cv2, resize original image to target shape(img_size) with keeping ratio.
171
-
172
- Returns:
173
- Image, original shape of image, resized image shape
174
- """
175
- path = self.img_paths[index]
176
- im = cv2.imread(path)
177
- assert im is not None, f"Image Not Found {path}, workdir: {os.getcwd()}"
178
-
179
- h0, w0 = im.shape[:2] # origin shape
180
- r = self.img_size / max(h0, w0)
181
- if r != 1:
182
- im = cv2.resize(
183
- im,
184
- (int(w0 * r), int(h0 * r)),
185
- interpolation=cv2.INTER_AREA
186
- if r < 1 and not self.augment
187
- else cv2.INTER_LINEAR,
188
- )
189
- return im, (h0, w0), im.shape[:2]
190
-
191
- @staticmethod
192
- def collate_fn(batch):
193
- """Merges a list of samples to form a mini-batch of Tensor(s)"""
194
- img, label, path, shapes = zip(*batch)
195
- for i, l in enumerate(label):
196
- l[:, 0] = i # add target image index for build_targets()
197
- return torch.stack(img, 0), torch.cat(label, 0), path, shapes
198
-
199
- def get_imgs_labels(self, img_dir):
200
-
201
- assert osp.exists(img_dir), f"{img_dir} is an invalid directory path!"
202
- valid_img_record = osp.join(
203
- osp.dirname(img_dir), "." + osp.basename(img_dir) + ".json"
204
- )
205
- NUM_THREADS = min(8, os.cpu_count())
206
-
207
- img_paths = glob.glob(osp.join(img_dir, "*"), recursive=True)
208
- img_paths = sorted(
209
- p for p in img_paths if p.split(".")[-1].lower() in IMG_FORMATS
210
- )
211
- assert img_paths, f"No images found in {img_dir}."
212
-
213
- img_hash = self.get_hash(img_paths)
214
- if osp.exists(valid_img_record):
215
- with open(valid_img_record, "r") as f:
216
- cache_info = json.load(f)
217
- if "image_hash" in cache_info and cache_info["image_hash"] == img_hash:
218
- img_info = cache_info["information"]
219
- else:
220
- self.check_images = True
221
- else:
222
- self.check_images = True
223
-
224
- # check images
225
- if self.check_images and self.main_process:
226
- img_info = {}
227
- nc, msgs = 0, [] # number corrupt, messages
228
- LOGGER.info(
229
- f"{self.task}: Checking formats of images with {NUM_THREADS} process(es): "
230
- )
231
- with Pool(NUM_THREADS) as pool:
232
- pbar = tqdm(
233
- pool.imap(TrainValDataset.check_image, img_paths),
234
- total=len(img_paths),
235
- )
236
- for img_path, shape_per_img, nc_per_img, msg in pbar:
237
- if nc_per_img == 0: # not corrupted
238
- img_info[img_path] = {"shape": shape_per_img}
239
- nc += nc_per_img
240
- if msg:
241
- msgs.append(msg)
242
- pbar.desc = f"{nc} image(s) corrupted"
243
- pbar.close()
244
- if msgs:
245
- LOGGER.info("\n".join(msgs))
246
-
247
- cache_info = {"information": img_info, "image_hash": img_hash}
248
- # save valid image paths.
249
- with open(valid_img_record, "w") as f:
250
- json.dump(cache_info, f)
251
-
252
- # check and load anns
253
- label_dir = osp.join(
254
- osp.dirname(osp.dirname(img_dir)), "labels", osp.basename(img_dir)
255
- )
256
- assert osp.exists(label_dir), f"{label_dir} is an invalid directory path!"
257
-
258
- img_paths = list(img_info.keys())
259
- label_paths = sorted(
260
- osp.join(label_dir, osp.splitext(osp.basename(p))[0] + ".txt")
261
- for p in img_paths
262
- )
263
- label_hash = self.get_hash(label_paths)
264
- if "label_hash" not in cache_info or cache_info["label_hash"] != label_hash:
265
- self.check_labels = True
266
-
267
- if self.check_labels:
268
- cache_info["label_hash"] = label_hash
269
- nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number corrupt, messages
270
- LOGGER.info(
271
- f"{self.task}: Checking formats of labels with {NUM_THREADS} process(es): "
272
- )
273
- with Pool(NUM_THREADS) as pool:
274
- pbar = pool.imap(
275
- TrainValDataset.check_label_files, zip(img_paths, label_paths)
276
- )
277
- pbar = tqdm(pbar, total=len(label_paths)) if self.main_process else pbar
278
- for (
279
- img_path,
280
- labels_per_file,
281
- nc_per_file,
282
- nm_per_file,
283
- nf_per_file,
284
- ne_per_file,
285
- msg,
286
- ) in pbar:
287
- if nc_per_file == 0:
288
- img_info[img_path]["labels"] = labels_per_file
289
- else:
290
- img_info.pop(img_path)
291
- nc += nc_per_file
292
- nm += nm_per_file
293
- nf += nf_per_file
294
- ne += ne_per_file
295
- if msg:
296
- msgs.append(msg)
297
- if self.main_process:
298
- pbar.desc = f"{nf} label(s) found, {nm} label(s) missing, {ne} label(s) empty, {nc} invalid label files"
299
- if self.main_process:
300
- pbar.close()
301
- with open(valid_img_record, "w") as f:
302
- json.dump(cache_info, f)
303
- if msgs:
304
- LOGGER.info("\n".join(msgs))
305
- if nf == 0:
306
- LOGGER.warning(
307
- f"WARNING: No labels found in {osp.dirname(self.img_paths[0])}. "
308
- )
309
-
310
- if self.task.lower() == "val":
311
- if self.data_dict.get("is_coco", False): # use original json file when evaluating on coco dataset.
312
- assert osp.exists(self.data_dict["anno_path"]), "Eval on coco dataset must provide valid path of the annotation file in config file: data/coco.yaml"
313
- else:
314
- assert (
315
- self.class_names
316
- ), "Class names is required when converting labels to coco format for evaluating."
317
- save_dir = osp.join(osp.dirname(osp.dirname(img_dir)), "annotations")
318
- if not osp.exists(save_dir):
319
- os.mkdir(save_dir)
320
- save_path = osp.join(
321
- save_dir, "instances_" + osp.basename(img_dir) + ".json"
322
- )
323
- TrainValDataset.generate_coco_format_labels(
324
- img_info, self.class_names, save_path
325
- )
326
-
327
- img_paths, labels = list(
328
- zip(
329
- *[
330
- (
331
- img_path,
332
- np.array(info["labels"], dtype=np.float32)
333
- if info["labels"]
334
- else np.zeros((0, 5), dtype=np.float32),
335
- )
336
- for img_path, info in img_info.items()
337
- ]
338
- )
339
- )
340
- self.img_info = img_info
341
- LOGGER.info(
342
- f"{self.task}: Final numbers of valid images: {len(img_paths)}/ labels: {len(labels)}. "
343
- )
344
- return img_paths, labels
345
-
346
- def get_mosaic(self, index):
347
- """Gets images and labels after mosaic augments"""
348
- indices = [index] + random.choices(
349
- range(0, len(self.img_paths)), k=3
350
- ) # 3 additional image indices
351
- random.shuffle(indices)
352
- imgs, hs, ws, labels = [], [], [], []
353
- for index in indices:
354
- img, _, (h, w) = self.load_image(index)
355
- labels_per_img = self.labels[index]
356
- imgs.append(img)
357
- hs.append(h)
358
- ws.append(w)
359
- labels.append(labels_per_img)
360
- img, labels = mosaic_augmentation(self.img_size, imgs, hs, ws, labels, self.hyp)
361
- return img, labels
362
-
363
- def general_augment(self, img, labels):
364
- """Gets images and labels after general augment
365
- This function applies hsv, random ud-flip and random lr-flips augments.
366
- """
367
- nl = len(labels)
368
-
369
- # HSV color-space
370
- augment_hsv(
371
- img,
372
- hgain=self.hyp["hsv_h"],
373
- sgain=self.hyp["hsv_s"],
374
- vgain=self.hyp["hsv_v"],
375
- )
376
-
377
- # Flip up-down
378
- if random.random() < self.hyp["flipud"]:
379
- img = np.flipud(img)
380
- if nl:
381
- labels[:, 2] = 1 - labels[:, 2]
382
-
383
- # Flip left-right
384
- if random.random() < self.hyp["fliplr"]:
385
- img = np.fliplr(img)
386
- if nl:
387
- labels[:, 1] = 1 - labels[:, 1]
388
-
389
- return img, labels
390
-
391
- def sort_files_shapes(self):
392
- # Sort by aspect ratio
393
- batch_num = self.batch_indices[-1] + 1
394
- s = self.shapes # wh
395
- ar = s[:, 1] / s[:, 0] # aspect ratio
396
- irect = ar.argsort()
397
- self.img_paths = [self.img_paths[i] for i in irect]
398
- self.labels = [self.labels[i] for i in irect]
399
- self.shapes = s[irect] # wh
400
- ar = ar[irect]
401
-
402
- # Set training image shapes
403
- shapes = [[1, 1]] * batch_num
404
- for i in range(batch_num):
405
- ari = ar[self.batch_indices == i]
406
- mini, maxi = ari.min(), ari.max()
407
- if maxi < 1:
408
- shapes[i] = [maxi, 1]
409
- elif mini > 1:
410
- shapes[i] = [1, 1 / mini]
411
- self.batch_shapes = (
412
- np.ceil(np.array(shapes) * self.img_size / self.stride + self.pad).astype(
413
- np.int
414
- )
415
- * self.stride
416
- )
417
-
418
- @staticmethod
419
- def check_image(im_file):
420
- # verify an image.
421
- nc, msg = 0, ""
422
- try:
423
- im = Image.open(im_file)
424
- im.verify() # PIL verify
425
- shape = im.size # (width, height)
426
- im_exif = im._getexif()
427
- if im_exif and ORIENTATION in im_exif:
428
- rotation = im_exif[ORIENTATION]
429
- if rotation in (6, 8):
430
- shape = (shape[1], shape[0])
431
-
432
- assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
433
- assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
434
- if im.format.lower() in ("jpg", "jpeg"):
435
- with open(im_file, "rb") as f:
436
- f.seek(-2, 2)
437
- if f.read() != b"\xff\xd9": # corrupt JPEG
438
- ImageOps.exif_transpose(Image.open(im_file)).save(
439
- im_file, "JPEG", subsampling=0, quality=100
440
- )
441
- msg += f"WARNING: {im_file}: corrupt JPEG restored and saved"
442
- return im_file, shape, nc, msg
443
- except Exception as e:
444
- nc = 1
445
- msg = f"WARNING: {im_file}: ignoring corrupt image: {e}"
446
- return im_file, None, nc, msg
447
-
448
- @staticmethod
449
- def check_label_files(args):
450
- img_path, lb_path = args
451
- nm, nf, ne, nc, msg = 0, 0, 0, 0, "" # number (missing, found, empty, message
452
- try:
453
- if osp.exists(lb_path):
454
- nf = 1 # label found
455
- with open(lb_path, "r") as f:
456
- labels = [
457
- x.split() for x in f.read().strip().splitlines() if len(x)
458
- ]
459
- labels = np.array(labels, dtype=np.float32)
460
- if len(labels):
461
- assert all(
462
- len(l) == 5 for l in labels
463
- ), f"{lb_path}: wrong label format."
464
- assert (
465
- labels >= 0
466
- ).all(), f"{lb_path}: Label values error: all values in label file must > 0"
467
- assert (
468
- labels[:, 1:] <= 1
469
- ).all(), f"{lb_path}: Label values error: all coordinates must be normalized"
470
-
471
- _, indices = np.unique(labels, axis=0, return_index=True)
472
- if len(indices) < len(labels): # duplicate row check
473
- labels = labels[indices] # remove duplicates
474
- msg += f"WARNING: {lb_path}: {len(labels) - len(indices)} duplicate labels removed"
475
- labels = labels.tolist()
476
- else:
477
- ne = 1 # label empty
478
- labels = []
479
- else:
480
- nm = 1 # label missing
481
- labels = []
482
-
483
- return img_path, labels, nc, nm, nf, ne, msg
484
- except Exception as e:
485
- nc = 1
486
- msg = f"WARNING: {lb_path}: ignoring invalid labels: {e}"
487
- return img_path, None, nc, nm, nf, ne, msg
488
-
489
- @staticmethod
490
- def generate_coco_format_labels(img_info, class_names, save_path):
491
- # for evaluation with pycocotools
492
- dataset = {"categories": [], "annotations": [], "images": []}
493
- for i, class_name in enumerate(class_names):
494
- dataset["categories"].append(
495
- {"id": i, "name": class_name, "supercategory": ""}
496
- )
497
-
498
- ann_id = 0
499
- LOGGER.info(f"Convert to COCO format")
500
- for i, (img_path, info) in enumerate(tqdm(img_info.items())):
501
- labels = info["labels"] if info["labels"] else []
502
- img_id = osp.splitext(osp.basename(img_path))[0]
503
- img_id = int(img_id) if img_id.isnumeric() else img_id
504
- img_w, img_h = info["shape"]
505
- dataset["images"].append(
506
- {
507
- "file_name": os.path.basename(img_path),
508
- "id": img_id,
509
- "width": img_w,
510
- "height": img_h,
511
- }
512
- )
513
- if labels:
514
- for label in labels:
515
- c, x, y, w, h = label[:5]
516
- # convert x,y,w,h to x1,y1,x2,y2
517
- x1 = (x - w / 2) * img_w
518
- y1 = (y - h / 2) * img_h
519
- x2 = (x + w / 2) * img_w
520
- y2 = (y + h / 2) * img_h
521
- # cls_id starts from 0
522
- cls_id = int(c)
523
- w = max(0, x2 - x1)
524
- h = max(0, y2 - y1)
525
- dataset["annotations"].append(
526
- {
527
- "area": h * w,
528
- "bbox": [x1, y1, w, h],
529
- "category_id": cls_id,
530
- "id": ann_id,
531
- "image_id": img_id,
532
- "iscrowd": 0,
533
- # mask
534
- "segmentation": [],
535
- }
536
- )
537
- ann_id += 1
538
-
539
- with open(save_path, "w") as f:
540
- json.dump(dataset, f)
541
- LOGGER.info(
542
- f"Convert to COCO format finished. Resutls saved in {save_path}"
543
- )
544
-
545
- @staticmethod
546
- def get_hash(paths):
547
- """Get the hash value of paths"""
548
- assert isinstance(paths, list), "Only support list currently."
549
- h = hashlib.md5("".join(paths).encode())
550
- return h.hexdigest()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/data/vis_dataset.py DELETED
@@ -1,57 +0,0 @@
1
- # coding=utf-8
2
- # Description: visualize yolo label image.
3
-
4
- import argparse
5
- import os
6
- import cv2
7
- import numpy as np
8
-
9
- IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo"]
10
-
11
- def main(args):
12
- img_dir, label_dir, class_names = args.img_dir, args.label_dir, args.class_names
13
-
14
- label_map = dict()
15
- for class_id, classname in enumerate(class_names):
16
- label_map[class_id] = classname
17
-
18
- for file in os.listdir(img_dir):
19
- if file.split('.')[-1] not in IMG_FORMATS:
20
- print(f'[Warning]: Non-image file {file}')
21
- continue
22
- img_path = os.path.join(img_dir, file)
23
- label_path = os.path.join(label_dir, file[: file.rindex('.')] + '.txt')
24
-
25
- try:
26
- img_data = cv2.imread(img_path)
27
- height, width, _ = img_data.shape
28
- color = [tuple(np.random.choice(range(256), size=3)) for i in class_names]
29
- thickness = 2
30
-
31
- with open(label_path, 'r') as f:
32
- for bbox in f:
33
- cls, x_c, y_c, w, h = [float(v) if i > 0 else int(v) for i, v in enumerate(bbox.split('\n')[0].split(' '))]
34
-
35
- x_tl = int((x_c - w / 2) * width)
36
- y_tl = int((y_c - h / 2) * height)
37
- cv2.rectangle(img_data, (x_tl, y_tl), (x_tl + int(w * width), y_tl + int(h * height)), tuple([int(x) for x in color[cls]]), thickness)
38
- cv2.putText(img_data, label_map[cls], (x_tl, y_tl - 10), cv2.FONT_HERSHEY_COMPLEX, 1, tuple([int(x) for x in color[cls]]), thickness)
39
-
40
- cv2.imshow('image', img_data)
41
- cv2.waitKey(0)
42
- except Exception as e:
43
- print(f'[Error]: {e} {img_path}')
44
- print('======All Done!======')
45
-
46
-
47
- if __name__ == '__main__':
48
- parser = argparse.ArgumentParser()
49
- parser.add_argument('--img_dir', default='VOCdevkit/voc_07_12/images')
50
- parser.add_argument('--label_dir', default='VOCdevkit/voc_07_12/labels')
51
- parser.add_argument('--class_names', default=['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
52
- 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'])
53
-
54
- args = parser.parse_args()
55
- print(args)
56
-
57
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/data/voc2yolo.py DELETED
@@ -1,99 +0,0 @@
1
- import xml.etree.ElementTree as ET
2
- from tqdm import tqdm
3
- import os
4
- import shutil
5
- import argparse
6
-
7
- # VOC dataset (refer https://github.com/ultralytics/yolov5/blob/master/data/VOC.yaml)
8
- # VOC2007 trainval: 446MB, 5012 images
9
- # VOC2007 test: 438MB, 4953 images
10
- # VOC2012 trainval: 1.95GB, 17126 images
11
-
12
- VOC_NAMES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
13
- 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
14
-
15
- def convert_label(path, lb_path, year, image_id):
16
- def convert_box(size, box):
17
- dw, dh = 1. / size[0], 1. / size[1]
18
- x, y, w, h = (box[0] + box[1]) / 2.0 - 1, (box[2] + box[3]) / 2.0 - 1, box[1] - box[0], box[3] - box[2]
19
- return x * dw, y * dh, w * dw, h * dh
20
- in_file = open(os.path.join(path, f'VOC{year}/Annotations/{image_id}.xml'))
21
- out_file = open(lb_path, 'w')
22
- tree = ET.parse(in_file)
23
- root = tree.getroot()
24
- size = root.find('size')
25
- w = int(size.find('width').text)
26
- h = int(size.find('height').text)
27
- for obj in root.iter('object'):
28
- cls = obj.find('name').text
29
- if cls in VOC_NAMES and not int(obj.find('difficult').text) == 1:
30
- xmlbox = obj.find('bndbox')
31
- bb = convert_box((w, h), [float(xmlbox.find(x).text) for x in ('xmin', 'xmax', 'ymin', 'ymax')])
32
- cls_id = VOC_NAMES.index(cls) # class id
33
- out_file.write(" ".join([str(a) for a in (cls_id, *bb)]) + '\n')
34
-
35
-
36
- def gen_voc07_12(voc_path):
37
- '''
38
- Generate voc07+12 setting dataset:
39
- train: # train images 16551 images
40
- - images/train2012
41
- - images/train2007
42
- - images/val2012
43
- - images/val2007
44
- val: # val images (relative to 'path') 4952 images
45
- - images/test2007
46
- '''
47
- dataset_root = os.path.join(voc_path, 'voc_07_12')
48
- if not os.path.exists(dataset_root):
49
- os.makedirs(dataset_root)
50
-
51
- dataset_settings = {'train': ['train2007', 'val2007', 'train2012', 'val2012'], 'val':['test2007']}
52
- for item in ['images', 'labels']:
53
- for data_type, data_list in dataset_settings.items():
54
- for data_name in data_list:
55
- ori_path = os.path.join(voc_path, item, data_name)
56
- new_path = os.path.join(dataset_root, item, data_type)
57
- if not os.path.exists(new_path):
58
- os.makedirs(new_path)
59
-
60
- print(f'[INFO]: Copying {ori_path} to {new_path}')
61
- for file in os.listdir(ori_path):
62
- shutil.copy(os.path.join(ori_path, file), new_path)
63
-
64
-
65
- def main(args):
66
- voc_path = args.voc_path
67
- for year, image_set in ('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test'):
68
- imgs_path = os.path.join(voc_path, 'images', f'{image_set}')
69
- lbs_path = os.path.join(voc_path, 'labels', f'{image_set}')
70
-
71
- try:
72
- with open(os.path.join(voc_path, f'VOC{year}/ImageSets/Main/{image_set}.txt'), 'r') as f:
73
- image_ids = f.read().strip().split()
74
- if not os.path.exists(imgs_path):
75
- os.makedirs(imgs_path)
76
- if not os.path.exists(lbs_path):
77
- os.makedirs(lbs_path)
78
-
79
- for id in tqdm(image_ids, desc=f'{image_set}{year}'):
80
- f = os.path.join(voc_path, f'VOC{year}/JPEGImages/{id}.jpg') # old img path
81
- lb_path = os.path.join(lbs_path, f'{id}.txt') # new label path
82
- convert_label(voc_path, lb_path, year, id) # convert labels to YOLO format
83
- if os.path.exists(f):
84
- shutil.move(f, imgs_path) # move image
85
- except Exception as e:
86
- print(f'[Warning]: {e} {year}{image_set} convert fail!')
87
-
88
- gen_voc07_12(voc_path)
89
-
90
-
91
-
92
- if __name__ == '__main__':
93
- parser = argparse.ArgumentParser()
94
- parser.add_argument('--voc_path', default='VOCdevkit')
95
-
96
- args = parser.parse_args()
97
- print(args)
98
-
99
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/layers/common.py DELETED
@@ -1,501 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
-
4
- import warnings
5
- from pathlib import Path
6
-
7
- import numpy as np
8
- import torch
9
- import torch.nn as nn
10
- import torch.nn.functional as F
11
- from yolov6.layers.dbb_transforms import *
12
-
13
-
14
- class SiLU(nn.Module):
15
- '''Activation of SiLU'''
16
- @staticmethod
17
- def forward(x):
18
- return x * torch.sigmoid(x)
19
-
20
-
21
- class Conv(nn.Module):
22
- '''Normal Conv with SiLU activation'''
23
- def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False):
24
- super().__init__()
25
- padding = kernel_size // 2
26
- self.conv = nn.Conv2d(
27
- in_channels,
28
- out_channels,
29
- kernel_size=kernel_size,
30
- stride=stride,
31
- padding=padding,
32
- groups=groups,
33
- bias=bias,
34
- )
35
- self.bn = nn.BatchNorm2d(out_channels)
36
- self.act = nn.SiLU()
37
-
38
- def forward(self, x):
39
- return self.act(self.bn(self.conv(x)))
40
-
41
- def forward_fuse(self, x):
42
- return self.act(self.conv(x))
43
-
44
-
45
- class SimConv(nn.Module):
46
- '''Normal Conv with ReLU activation'''
47
- def __init__(self, in_channels, out_channels, kernel_size, stride, groups=1, bias=False):
48
- super().__init__()
49
- padding = kernel_size // 2
50
- self.conv = nn.Conv2d(
51
- in_channels,
52
- out_channels,
53
- kernel_size=kernel_size,
54
- stride=stride,
55
- padding=padding,
56
- groups=groups,
57
- bias=bias,
58
- )
59
- self.bn = nn.BatchNorm2d(out_channels)
60
- self.act = nn.ReLU()
61
-
62
- def forward(self, x):
63
- return self.act(self.bn(self.conv(x)))
64
-
65
- def forward_fuse(self, x):
66
- return self.act(self.conv(x))
67
-
68
-
69
- class SimSPPF(nn.Module):
70
- '''Simplified SPPF with ReLU activation'''
71
- def __init__(self, in_channels, out_channels, kernel_size=5):
72
- super().__init__()
73
- c_ = in_channels // 2 # hidden channels
74
- self.cv1 = SimConv(in_channels, c_, 1, 1)
75
- self.cv2 = SimConv(c_ * 4, out_channels, 1, 1)
76
- self.m = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=kernel_size // 2)
77
-
78
- def forward(self, x):
79
- x = self.cv1(x)
80
- with warnings.catch_warnings():
81
- warnings.simplefilter('ignore')
82
- y1 = self.m(x)
83
- y2 = self.m(y1)
84
- return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
85
-
86
-
87
- class Transpose(nn.Module):
88
- '''Normal Transpose, default for upsampling'''
89
- def __init__(self, in_channels, out_channels, kernel_size=2, stride=2):
90
- super().__init__()
91
- self.upsample_transpose = torch.nn.ConvTranspose2d(
92
- in_channels=in_channels,
93
- out_channels=out_channels,
94
- kernel_size=kernel_size,
95
- stride=stride,
96
- bias=True
97
- )
98
-
99
- def forward(self, x):
100
- return self.upsample_transpose(x)
101
-
102
-
103
- class Concat(nn.Module):
104
- def __init__(self, dimension=1):
105
- super().__init__()
106
- self.d = dimension
107
-
108
- def forward(self, x):
109
- return torch.cat(x, self.d)
110
-
111
-
112
- def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1):
113
- '''Basic cell for rep-style block, including conv and bn'''
114
- result = nn.Sequential()
115
- result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
116
- kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False))
117
- result.add_module('bn', nn.BatchNorm2d(num_features=out_channels))
118
- return result
119
-
120
-
121
- class RepBlock(nn.Module):
122
- '''
123
- RepBlock is a stage block with rep-style basic block
124
- '''
125
- def __init__(self, in_channels, out_channels, n=1):
126
- super().__init__()
127
- self.conv1 = RepVGGBlock(in_channels, out_channels)
128
- self.block = nn.Sequential(*(RepVGGBlock(out_channels, out_channels) for _ in range(n - 1))) if n > 1 else None
129
-
130
- def forward(self, x):
131
- x = self.conv1(x)
132
- if self.block is not None:
133
- x = self.block(x)
134
- return x
135
-
136
-
137
- class RepVGGBlock(nn.Module):
138
- '''RepVGGBlock is a basic rep-style block, including training and deploy status
139
- This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
140
- '''
141
- def __init__(self, in_channels, out_channels, kernel_size=3,
142
- stride=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False):
143
- super(RepVGGBlock, self).__init__()
144
- """ Initialization of the class.
145
- Args:
146
- in_channels (int): Number of channels in the input image
147
- out_channels (int): Number of channels produced by the convolution
148
- kernel_size (int or tuple): Size of the convolving kernel
149
- stride (int or tuple, optional): Stride of the convolution. Default: 1
150
- padding (int or tuple, optional): Zero-padding added to both sides of
151
- the input. Default: 1
152
- dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
153
- groups (int, optional): Number of blocked connections from input
154
- channels to output channels. Default: 1
155
- padding_mode (string, optional): Default: 'zeros'
156
- deploy: Whether to be deploy status or training status. Default: False
157
- use_se: Whether to use se. Default: False
158
- """
159
- self.deploy = deploy
160
- self.groups = groups
161
- self.in_channels = in_channels
162
- self.out_channels = out_channels
163
-
164
- assert kernel_size == 3
165
- assert padding == 1
166
-
167
- padding_11 = padding - kernel_size // 2
168
-
169
- self.nonlinearity = nn.ReLU()
170
-
171
- if use_se:
172
- raise NotImplementedError("se block not supported yet")
173
- else:
174
- self.se = nn.Identity()
175
-
176
- if deploy:
177
- self.rbr_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
178
- padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode)
179
-
180
- else:
181
- self.rbr_identity = nn.BatchNorm2d(num_features=in_channels) if out_channels == in_channels and stride == 1 else None
182
- self.rbr_dense = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups)
183
- self.rbr_1x1 = conv_bn(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=padding_11, groups=groups)
184
-
185
- def forward(self, inputs):
186
- '''Forward process'''
187
- if hasattr(self, 'rbr_reparam'):
188
- return self.nonlinearity(self.se(self.rbr_reparam(inputs)))
189
-
190
- if self.rbr_identity is None:
191
- id_out = 0
192
- else:
193
- id_out = self.rbr_identity(inputs)
194
-
195
- return self.nonlinearity(self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out))
196
-
197
- def get_equivalent_kernel_bias(self):
198
- kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
199
- kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
200
- kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
201
- return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
202
-
203
- def _pad_1x1_to_3x3_tensor(self, kernel1x1):
204
- if kernel1x1 is None:
205
- return 0
206
- else:
207
- return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
208
-
209
- def _fuse_bn_tensor(self, branch):
210
- if branch is None:
211
- return 0, 0
212
- if isinstance(branch, nn.Sequential):
213
- kernel = branch.conv.weight
214
- running_mean = branch.bn.running_mean
215
- running_var = branch.bn.running_var
216
- gamma = branch.bn.weight
217
- beta = branch.bn.bias
218
- eps = branch.bn.eps
219
- else:
220
- assert isinstance(branch, nn.BatchNorm2d)
221
- if not hasattr(self, 'id_tensor'):
222
- input_dim = self.in_channels // self.groups
223
- kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32)
224
- for i in range(self.in_channels):
225
- kernel_value[i, i % input_dim, 1, 1] = 1
226
- self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
227
- kernel = self.id_tensor
228
- running_mean = branch.running_mean
229
- running_var = branch.running_var
230
- gamma = branch.weight
231
- beta = branch.bias
232
- eps = branch.eps
233
- std = (running_var + eps).sqrt()
234
- t = (gamma / std).reshape(-1, 1, 1, 1)
235
- return kernel * t, beta - running_mean * gamma / std
236
-
237
- def switch_to_deploy(self):
238
- if hasattr(self, 'rbr_reparam'):
239
- return
240
- kernel, bias = self.get_equivalent_kernel_bias()
241
- self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.conv.in_channels, out_channels=self.rbr_dense.conv.out_channels,
242
- kernel_size=self.rbr_dense.conv.kernel_size, stride=self.rbr_dense.conv.stride,
243
- padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, groups=self.rbr_dense.conv.groups, bias=True)
244
- self.rbr_reparam.weight.data = kernel
245
- self.rbr_reparam.bias.data = bias
246
- for para in self.parameters():
247
- para.detach_()
248
- self.__delattr__('rbr_dense')
249
- self.__delattr__('rbr_1x1')
250
- if hasattr(self, 'rbr_identity'):
251
- self.__delattr__('rbr_identity')
252
- if hasattr(self, 'id_tensor'):
253
- self.__delattr__('id_tensor')
254
- self.deploy = True
255
-
256
-
257
- def conv_bn_v2(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1,
258
- padding_mode='zeros'):
259
- conv_layer = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
260
- stride=stride, padding=padding, dilation=dilation, groups=groups,
261
- bias=False, padding_mode=padding_mode)
262
- bn_layer = nn.BatchNorm2d(num_features=out_channels, affine=True)
263
- se = nn.Sequential()
264
- se.add_module('conv', conv_layer)
265
- se.add_module('bn', bn_layer)
266
- return se
267
-
268
-
269
- class IdentityBasedConv1x1(nn.Conv2d):
270
-
271
- def __init__(self, channels, groups=1):
272
- super(IdentityBasedConv1x1, self).__init__(in_channels=channels, out_channels=channels, kernel_size=1, stride=1, padding=0, groups=groups, bias=False)
273
-
274
- assert channels % groups == 0
275
- input_dim = channels // groups
276
- id_value = np.zeros((channels, input_dim, 1, 1))
277
- for i in range(channels):
278
- id_value[i, i % input_dim, 0, 0] = 1
279
- self.id_tensor = torch.from_numpy(id_value).type_as(self.weight)
280
- nn.init.zeros_(self.weight)
281
-
282
- def forward(self, input):
283
- kernel = self.weight + self.id_tensor.to(self.weight.device)
284
- result = F.conv2d(input, kernel, None, stride=1, padding=0, dilation=self.dilation, groups=self.groups)
285
- return result
286
-
287
- def get_actual_kernel(self):
288
- return self.weight + self.id_tensor.to(self.weight.device)
289
-
290
-
291
- class BNAndPadLayer(nn.Module):
292
- def __init__(self,
293
- pad_pixels,
294
- num_features,
295
- eps=1e-5,
296
- momentum=0.1,
297
- affine=True,
298
- track_running_stats=True):
299
- super(BNAndPadLayer, self).__init__()
300
- self.bn = nn.BatchNorm2d(num_features, eps, momentum, affine, track_running_stats)
301
- self.pad_pixels = pad_pixels
302
-
303
- def forward(self, input):
304
- output = self.bn(input)
305
- if self.pad_pixels > 0:
306
- if self.bn.affine:
307
- pad_values = self.bn.bias.detach() - self.bn.running_mean * self.bn.weight.detach() / torch.sqrt(self.bn.running_var + self.bn.eps)
308
- else:
309
- pad_values = - self.bn.running_mean / torch.sqrt(self.bn.running_var + self.bn.eps)
310
- output = F.pad(output, [self.pad_pixels] * 4)
311
- pad_values = pad_values.view(1, -1, 1, 1)
312
- output[:, :, 0:self.pad_pixels, :] = pad_values
313
- output[:, :, -self.pad_pixels:, :] = pad_values
314
- output[:, :, :, 0:self.pad_pixels] = pad_values
315
- output[:, :, :, -self.pad_pixels:] = pad_values
316
- return output
317
-
318
- @property
319
- def bn_weight(self):
320
- return self.bn.weight
321
-
322
- @property
323
- def bn_bias(self):
324
- return self.bn.bias
325
-
326
- @property
327
- def running_mean(self):
328
- return self.bn.running_mean
329
-
330
- @property
331
- def running_var(self):
332
- return self.bn.running_var
333
-
334
- @property
335
- def eps(self):
336
- return self.bn.eps
337
-
338
-
339
- class DBBBlock(nn.Module):
340
- '''
341
- RepBlock is a stage block with rep-style basic block
342
- '''
343
- def __init__(self, in_channels, out_channels, n=1):
344
- super().__init__()
345
- self.conv1 = DiverseBranchBlock(in_channels, out_channels)
346
- self.block = nn.Sequential(*(DiverseBranchBlock(out_channels, out_channels) for _ in range(n - 1))) if n > 1 else None
347
-
348
- def forward(self, x):
349
- x = self.conv1(x)
350
- if self.block is not None:
351
- x = self.block(x)
352
- return x
353
-
354
-
355
- class DiverseBranchBlock(nn.Module):
356
-
357
- def __init__(self, in_channels, out_channels, kernel_size=3,
358
- stride=1, padding=1, dilation=1, groups=1,
359
- internal_channels_1x1_3x3=None,
360
- deploy=False, nonlinear=nn.ReLU(), single_init=False):
361
- super(DiverseBranchBlock, self).__init__()
362
- self.deploy = deploy
363
-
364
- if nonlinear is None:
365
- self.nonlinear = nn.Identity()
366
- else:
367
- self.nonlinear = nonlinear
368
-
369
- self.kernel_size = kernel_size
370
- self.out_channels = out_channels
371
- self.groups = groups
372
- assert padding == kernel_size // 2
373
-
374
- if deploy:
375
- self.dbb_reparam = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,
376
- padding=padding, dilation=dilation, groups=groups, bias=True)
377
-
378
- else:
379
-
380
- self.dbb_origin = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups)
381
-
382
- self.dbb_avg = nn.Sequential()
383
- if groups < out_channels:
384
- self.dbb_avg.add_module('conv',
385
- nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1,
386
- stride=1, padding=0, groups=groups, bias=False))
387
- self.dbb_avg.add_module('bn', BNAndPadLayer(pad_pixels=padding, num_features=out_channels))
388
- self.dbb_avg.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=0))
389
- self.dbb_1x1 = conv_bn_v2(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=stride,
390
- padding=0, groups=groups)
391
- else:
392
- self.dbb_avg.add_module('avg', nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=padding))
393
-
394
- self.dbb_avg.add_module('avgbn', nn.BatchNorm2d(out_channels))
395
-
396
- if internal_channels_1x1_3x3 is None:
397
- internal_channels_1x1_3x3 = in_channels if groups < out_channels else 2 * in_channels # For mobilenet, it is better to have 2X internal channels
398
-
399
- self.dbb_1x1_kxk = nn.Sequential()
400
- if internal_channels_1x1_3x3 == in_channels:
401
- self.dbb_1x1_kxk.add_module('idconv1', IdentityBasedConv1x1(channels=in_channels, groups=groups))
402
- else:
403
- self.dbb_1x1_kxk.add_module('conv1', nn.Conv2d(in_channels=in_channels, out_channels=internal_channels_1x1_3x3,
404
- kernel_size=1, stride=1, padding=0, groups=groups, bias=False))
405
- self.dbb_1x1_kxk.add_module('bn1', BNAndPadLayer(pad_pixels=padding, num_features=internal_channels_1x1_3x3, affine=True))
406
- self.dbb_1x1_kxk.add_module('conv2', nn.Conv2d(in_channels=internal_channels_1x1_3x3, out_channels=out_channels,
407
- kernel_size=kernel_size, stride=stride, padding=0, groups=groups, bias=False))
408
- self.dbb_1x1_kxk.add_module('bn2', nn.BatchNorm2d(out_channels))
409
-
410
- # The experiments reported in the paper used the default initialization of bn.weight (all as 1). But changing the initialization may be useful in some cases.
411
- if single_init:
412
- # Initialize the bn.weight of dbb_origin as 1 and others as 0. This is not the default setting.
413
- self.single_init()
414
-
415
- def get_equivalent_kernel_bias(self):
416
- k_origin, b_origin = transI_fusebn(self.dbb_origin.conv.weight, self.dbb_origin.bn)
417
-
418
- if hasattr(self, 'dbb_1x1'):
419
- k_1x1, b_1x1 = transI_fusebn(self.dbb_1x1.conv.weight, self.dbb_1x1.bn)
420
- k_1x1 = transVI_multiscale(k_1x1, self.kernel_size)
421
- else:
422
- k_1x1, b_1x1 = 0, 0
423
-
424
- if hasattr(self.dbb_1x1_kxk, 'idconv1'):
425
- k_1x1_kxk_first = self.dbb_1x1_kxk.idconv1.get_actual_kernel()
426
- else:
427
- k_1x1_kxk_first = self.dbb_1x1_kxk.conv1.weight
428
- k_1x1_kxk_first, b_1x1_kxk_first = transI_fusebn(k_1x1_kxk_first, self.dbb_1x1_kxk.bn1)
429
- k_1x1_kxk_second, b_1x1_kxk_second = transI_fusebn(self.dbb_1x1_kxk.conv2.weight, self.dbb_1x1_kxk.bn2)
430
- k_1x1_kxk_merged, b_1x1_kxk_merged = transIII_1x1_kxk(k_1x1_kxk_first, b_1x1_kxk_first, k_1x1_kxk_second, b_1x1_kxk_second, groups=self.groups)
431
-
432
- k_avg = transV_avg(self.out_channels, self.kernel_size, self.groups)
433
- k_1x1_avg_second, b_1x1_avg_second = transI_fusebn(k_avg.to(self.dbb_avg.avgbn.weight.device), self.dbb_avg.avgbn)
434
- if hasattr(self.dbb_avg, 'conv'):
435
- k_1x1_avg_first, b_1x1_avg_first = transI_fusebn(self.dbb_avg.conv.weight, self.dbb_avg.bn)
436
- k_1x1_avg_merged, b_1x1_avg_merged = transIII_1x1_kxk(k_1x1_avg_first, b_1x1_avg_first, k_1x1_avg_second, b_1x1_avg_second, groups=self.groups)
437
- else:
438
- k_1x1_avg_merged, b_1x1_avg_merged = k_1x1_avg_second, b_1x1_avg_second
439
-
440
- return transII_addbranch((k_origin, k_1x1, k_1x1_kxk_merged, k_1x1_avg_merged), (b_origin, b_1x1, b_1x1_kxk_merged, b_1x1_avg_merged))
441
-
442
- def switch_to_deploy(self):
443
- if hasattr(self, 'dbb_reparam'):
444
- return
445
- kernel, bias = self.get_equivalent_kernel_bias()
446
- self.dbb_reparam = nn.Conv2d(in_channels=self.dbb_origin.conv.in_channels, out_channels=self.dbb_origin.conv.out_channels,
447
- kernel_size=self.dbb_origin.conv.kernel_size, stride=self.dbb_origin.conv.stride,
448
- padding=self.dbb_origin.conv.padding, dilation=self.dbb_origin.conv.dilation, groups=self.dbb_origin.conv.groups, bias=True)
449
- self.dbb_reparam.weight.data = kernel
450
- self.dbb_reparam.bias.data = bias
451
- for para in self.parameters():
452
- para.detach_()
453
- self.__delattr__('dbb_origin')
454
- self.__delattr__('dbb_avg')
455
- if hasattr(self, 'dbb_1x1'):
456
- self.__delattr__('dbb_1x1')
457
- self.__delattr__('dbb_1x1_kxk')
458
-
459
- def forward(self, inputs):
460
-
461
- if hasattr(self, 'dbb_reparam'):
462
- return self.nonlinear(self.dbb_reparam(inputs))
463
-
464
- out = self.dbb_origin(inputs)
465
- if hasattr(self, 'dbb_1x1'):
466
- out += self.dbb_1x1(inputs)
467
- out += self.dbb_avg(inputs)
468
- out += self.dbb_1x1_kxk(inputs)
469
- return self.nonlinear(out)
470
-
471
- def init_gamma(self, gamma_value):
472
- if hasattr(self, "dbb_origin"):
473
- torch.nn.init.constant_(self.dbb_origin.bn.weight, gamma_value)
474
- if hasattr(self, "dbb_1x1"):
475
- torch.nn.init.constant_(self.dbb_1x1.bn.weight, gamma_value)
476
- if hasattr(self, "dbb_avg"):
477
- torch.nn.init.constant_(self.dbb_avg.avgbn.weight, gamma_value)
478
- if hasattr(self, "dbb_1x1_kxk"):
479
- torch.nn.init.constant_(self.dbb_1x1_kxk.bn2.weight, gamma_value)
480
-
481
- def single_init(self):
482
- self.init_gamma(0.0)
483
- if hasattr(self, "dbb_origin"):
484
- torch.nn.init.constant_(self.dbb_origin.bn.weight, 1.0)
485
-
486
-
487
- class DetectBackend(nn.Module):
488
- def __init__(self, weights='yolov6s.pt', device=None, dnn=True):
489
-
490
- super().__init__()
491
- assert isinstance(weights, str) and Path(weights).suffix == '.pt', f'{Path(weights).suffix} format is not supported.'
492
- from yolov6.utils.checkpoint import load_checkpoint
493
- model = load_checkpoint(weights, map_location=device)
494
- stride = int(model.stride.max())
495
- self.__dict__.update(locals()) # assign all variables to self
496
-
497
- def forward(self, im, val=False):
498
- y = self.model(im)
499
- if isinstance(y, np.ndarray):
500
- y = torch.tensor(y, device=self.device)
501
- return y
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/layers/dbb_transforms.py DELETED
@@ -1,50 +0,0 @@
1
- import torch
2
- import numpy as np
3
- import torch.nn.functional as F
4
-
5
-
6
- def transI_fusebn(kernel, bn):
7
- gamma = bn.weight
8
- std = (bn.running_var + bn.eps).sqrt()
9
- return kernel * ((gamma / std).reshape(-1, 1, 1, 1)), bn.bias - bn.running_mean * gamma / std
10
-
11
-
12
- def transII_addbranch(kernels, biases):
13
- return sum(kernels), sum(biases)
14
-
15
-
16
- def transIII_1x1_kxk(k1, b1, k2, b2, groups):
17
- if groups == 1:
18
- k = F.conv2d(k2, k1.permute(1, 0, 2, 3)) #
19
- b_hat = (k2 * b1.reshape(1, -1, 1, 1)).sum((1, 2, 3))
20
- else:
21
- k_slices = []
22
- b_slices = []
23
- k1_T = k1.permute(1, 0, 2, 3)
24
- k1_group_width = k1.size(0) // groups
25
- k2_group_width = k2.size(0) // groups
26
- for g in range(groups):
27
- k1_T_slice = k1_T[:, g*k1_group_width:(g+1)*k1_group_width, :, :]
28
- k2_slice = k2[g*k2_group_width:(g+1)*k2_group_width, :, :, :]
29
- k_slices.append(F.conv2d(k2_slice, k1_T_slice))
30
- b_slices.append((k2_slice * b1[g*k1_group_width:(g+1)*k1_group_width].reshape(1, -1, 1, 1)).sum((1, 2, 3)))
31
- k, b_hat = transIV_depthconcat(k_slices, b_slices)
32
- return k, b_hat + b2
33
-
34
-
35
- def transIV_depthconcat(kernels, biases):
36
- return torch.cat(kernels, dim=0), torch.cat(biases)
37
-
38
-
39
- def transV_avg(channels, kernel_size, groups):
40
- input_dim = channels // groups
41
- k = torch.zeros((channels, input_dim, kernel_size, kernel_size))
42
- k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2
43
- return k
44
-
45
-
46
- # This has not been tested with non-square kernels (kernel.size(2) != kernel.size(3)) nor even-size kernels
47
- def transVI_multiscale(kernel, target_kernel_size):
48
- H_pixels_to_pad = (target_kernel_size - kernel.size(2)) // 2
49
- W_pixels_to_pad = (target_kernel_size - kernel.size(3)) // 2
50
- return F.pad(kernel, [H_pixels_to_pad, H_pixels_to_pad, W_pixels_to_pad, W_pixels_to_pad])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/models/efficientrep.py DELETED
@@ -1,102 +0,0 @@
1
- from torch import nn
2
- from yolov6.layers.common import RepVGGBlock, RepBlock, SimSPPF
3
-
4
-
5
- class EfficientRep(nn.Module):
6
- '''EfficientRep Backbone
7
- EfficientRep is handcrafted by hardware-aware neural network design.
8
- With rep-style struct, EfficientRep is friendly to high-computation hardware(e.g. GPU).
9
- '''
10
-
11
- def __init__(
12
- self,
13
- in_channels=3,
14
- channels_list=None,
15
- num_repeats=None,
16
- ):
17
- super().__init__()
18
-
19
- assert channels_list is not None
20
- assert num_repeats is not None
21
-
22
- self.stem = RepVGGBlock(
23
- in_channels=in_channels,
24
- out_channels=channels_list[0],
25
- kernel_size=3,
26
- stride=2
27
- )
28
-
29
- self.ERBlock_2 = nn.Sequential(
30
- RepVGGBlock(
31
- in_channels=channels_list[0],
32
- out_channels=channels_list[1],
33
- kernel_size=3,
34
- stride=2
35
- ),
36
- RepBlock(
37
- in_channels=channels_list[1],
38
- out_channels=channels_list[1],
39
- n=num_repeats[1]
40
- )
41
- )
42
-
43
- self.ERBlock_3 = nn.Sequential(
44
- RepVGGBlock(
45
- in_channels=channels_list[1],
46
- out_channels=channels_list[2],
47
- kernel_size=3,
48
- stride=2
49
- ),
50
- RepBlock(
51
- in_channels=channels_list[2],
52
- out_channels=channels_list[2],
53
- n=num_repeats[2]
54
- )
55
- )
56
-
57
- self.ERBlock_4 = nn.Sequential(
58
- RepVGGBlock(
59
- in_channels=channels_list[2],
60
- out_channels=channels_list[3],
61
- kernel_size=3,
62
- stride=2
63
- ),
64
- RepBlock(
65
- in_channels=channels_list[3],
66
- out_channels=channels_list[3],
67
- n=num_repeats[3]
68
- )
69
- )
70
-
71
- self.ERBlock_5 = nn.Sequential(
72
- RepVGGBlock(
73
- in_channels=channels_list[3],
74
- out_channels=channels_list[4],
75
- kernel_size=3,
76
- stride=2,
77
- ),
78
- RepBlock(
79
- in_channels=channels_list[4],
80
- out_channels=channels_list[4],
81
- n=num_repeats[4]
82
- ),
83
- SimSPPF(
84
- in_channels=channels_list[4],
85
- out_channels=channels_list[4],
86
- kernel_size=5
87
- )
88
- )
89
-
90
- def forward(self, x):
91
-
92
- outputs = []
93
- x = self.stem(x)
94
- x = self.ERBlock_2(x)
95
- x = self.ERBlock_3(x)
96
- outputs.append(x)
97
- x = self.ERBlock_4(x)
98
- outputs.append(x)
99
- x = self.ERBlock_5(x)
100
- outputs.append(x)
101
-
102
- return tuple(outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/models/effidehead.py DELETED
@@ -1,211 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import math
4
- from yolov6.layers.common import *
5
-
6
-
7
- class Detect(nn.Module):
8
- '''Efficient Decoupled Head
9
- With hardware-aware degisn, the decoupled head is optimized with
10
- hybridchannels methods.
11
- '''
12
- def __init__(self, num_classes=80, anchors=1, num_layers=3, inplace=True, head_layers=None): # detection layer
13
- super().__init__()
14
- assert head_layers is not None
15
- self.nc = num_classes # number of classes
16
- self.no = num_classes + 5 # number of outputs per anchor
17
- self.nl = num_layers # number of detection layers
18
- if isinstance(anchors, (list, tuple)):
19
- self.na = len(anchors[0]) // 2
20
- else:
21
- self.na = anchors
22
- self.anchors = anchors
23
- self.grid = [torch.zeros(1)] * num_layers
24
- self.prior_prob = 1e-2
25
- self.inplace = inplace
26
- stride = [8, 16, 32] # strides computed during build
27
- self.stride = torch.tensor(stride)
28
-
29
- # Init decouple head
30
- self.cls_convs = nn.ModuleList()
31
- self.reg_convs = nn.ModuleList()
32
- self.cls_preds = nn.ModuleList()
33
- self.reg_preds = nn.ModuleList()
34
- self.obj_preds = nn.ModuleList()
35
- self.stems = nn.ModuleList()
36
-
37
- # Efficient decoupled head layers
38
- for i in range(num_layers):
39
- idx = i*6
40
- self.stems.append(head_layers[idx])
41
- self.cls_convs.append(head_layers[idx+1])
42
- self.reg_convs.append(head_layers[idx+2])
43
- self.cls_preds.append(head_layers[idx+3])
44
- self.reg_preds.append(head_layers[idx+4])
45
- self.obj_preds.append(head_layers[idx+5])
46
-
47
- def initialize_biases(self):
48
- for conv in self.cls_preds:
49
- b = conv.bias.view(self.na, -1)
50
- b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
51
- conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
52
- for conv in self.obj_preds:
53
- b = conv.bias.view(self.na, -1)
54
- b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
55
- conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
56
-
57
- def forward(self, x):
58
- z = []
59
- for i in range(self.nl):
60
- x[i] = self.stems[i](x[i])
61
- cls_x = x[i]
62
- reg_x = x[i]
63
- cls_feat = self.cls_convs[i](cls_x)
64
- cls_output = self.cls_preds[i](cls_feat)
65
- reg_feat = self.reg_convs[i](reg_x)
66
- reg_output = self.reg_preds[i](reg_feat)
67
- obj_output = self.obj_preds[i](reg_feat)
68
- if self.training:
69
- x[i] = torch.cat([reg_output, obj_output, cls_output], 1)
70
- bs, _, ny, nx = x[i].shape
71
- x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
72
- else:
73
- y = torch.cat([reg_output, obj_output.sigmoid(), cls_output.sigmoid()], 1)
74
- bs, _, ny, nx = y.shape
75
- y = y.view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
76
- if self.grid[i].shape[2:4] != y.shape[2:4]:
77
- d = self.stride.device
78
- yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])
79
- self.grid[i] = torch.stack((xv, yv), 2).view(1, self.na, ny, nx, 2).float()
80
- if self.inplace:
81
- y[..., 0:2] = (y[..., 0:2] + self.grid[i]) * self.stride[i] # xy
82
- y[..., 2:4] = torch.exp(y[..., 2:4]) * self.stride[i] # wh
83
- else:
84
- xy = (y[..., 0:2] + self.grid[i]) * self.stride[i] # xy
85
- wh = torch.exp(y[..., 2:4]) * self.stride[i] # wh
86
- y = torch.cat((xy, wh, y[..., 4:]), -1)
87
- z.append(y.view(bs, -1, self.no))
88
- return x if self.training else torch.cat(z, 1)
89
-
90
-
91
- def build_effidehead_layer(channels_list, num_anchors, num_classes):
92
- head_layers = nn.Sequential(
93
- # stem0
94
- Conv(
95
- in_channels=channels_list[6],
96
- out_channels=channels_list[6],
97
- kernel_size=1,
98
- stride=1
99
- ),
100
- # cls_conv0
101
- Conv(
102
- in_channels=channels_list[6],
103
- out_channels=channels_list[6],
104
- kernel_size=3,
105
- stride=1
106
- ),
107
- # reg_conv0
108
- Conv(
109
- in_channels=channels_list[6],
110
- out_channels=channels_list[6],
111
- kernel_size=3,
112
- stride=1
113
- ),
114
- # cls_pred0
115
- nn.Conv2d(
116
- in_channels=channels_list[6],
117
- out_channels=num_classes * num_anchors,
118
- kernel_size=1
119
- ),
120
- # reg_pred0
121
- nn.Conv2d(
122
- in_channels=channels_list[6],
123
- out_channels=4 * num_anchors,
124
- kernel_size=1
125
- ),
126
- # obj_pred0
127
- nn.Conv2d(
128
- in_channels=channels_list[6],
129
- out_channels=1 * num_anchors,
130
- kernel_size=1
131
- ),
132
- # stem1
133
- Conv(
134
- in_channels=channels_list[8],
135
- out_channels=channels_list[8],
136
- kernel_size=1,
137
- stride=1
138
- ),
139
- # cls_conv1
140
- Conv(
141
- in_channels=channels_list[8],
142
- out_channels=channels_list[8],
143
- kernel_size=3,
144
- stride=1
145
- ),
146
- # reg_conv1
147
- Conv(
148
- in_channels=channels_list[8],
149
- out_channels=channels_list[8],
150
- kernel_size=3,
151
- stride=1
152
- ),
153
- # cls_pred1
154
- nn.Conv2d(
155
- in_channels=channels_list[8],
156
- out_channels=num_classes * num_anchors,
157
- kernel_size=1
158
- ),
159
- # reg_pred1
160
- nn.Conv2d(
161
- in_channels=channels_list[8],
162
- out_channels=4 * num_anchors,
163
- kernel_size=1
164
- ),
165
- # obj_pred1
166
- nn.Conv2d(
167
- in_channels=channels_list[8],
168
- out_channels=1 * num_anchors,
169
- kernel_size=1
170
- ),
171
- # stem2
172
- Conv(
173
- in_channels=channels_list[10],
174
- out_channels=channels_list[10],
175
- kernel_size=1,
176
- stride=1
177
- ),
178
- # cls_conv2
179
- Conv(
180
- in_channels=channels_list[10],
181
- out_channels=channels_list[10],
182
- kernel_size=3,
183
- stride=1
184
- ),
185
- # reg_conv2
186
- Conv(
187
- in_channels=channels_list[10],
188
- out_channels=channels_list[10],
189
- kernel_size=3,
190
- stride=1
191
- ),
192
- # cls_pred2
193
- nn.Conv2d(
194
- in_channels=channels_list[10],
195
- out_channels=num_classes * num_anchors,
196
- kernel_size=1
197
- ),
198
- # reg_pred2
199
- nn.Conv2d(
200
- in_channels=channels_list[10],
201
- out_channels=4 * num_anchors,
202
- kernel_size=1
203
- ),
204
- # obj_pred2
205
- nn.Conv2d(
206
- in_channels=channels_list[10],
207
- out_channels=1 * num_anchors,
208
- kernel_size=1
209
- )
210
- )
211
- return head_layers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/models/end2end.py DELETED
@@ -1,147 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import random
4
-
5
- class ORT_NMS(torch.autograd.Function):
6
-
7
- @staticmethod
8
- def forward(ctx,
9
- boxes,
10
- scores,
11
- max_output_boxes_per_class=torch.tensor([100]),
12
- iou_threshold=torch.tensor([0.45]),
13
- score_threshold=torch.tensor([0.25])):
14
- device = boxes.device
15
- batch = scores.shape[0]
16
- num_det = random.randint(0, 100)
17
- batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device)
18
- idxs = torch.arange(100, 100 + num_det).to(device)
19
- zeros = torch.zeros((num_det,), dtype=torch.int64).to(device)
20
- selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], 0).T.contiguous()
21
- selected_indices = selected_indices.to(torch.int64)
22
- return selected_indices
23
-
24
- @staticmethod
25
- def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold):
26
- return g.op("NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold)
27
-
28
- class TRT_NMS(torch.autograd.Function):
29
- @staticmethod
30
- def forward(
31
- ctx,
32
- boxes,
33
- scores,
34
- background_class=-1,
35
- box_coding=1,
36
- iou_threshold=0.45,
37
- max_output_boxes=100,
38
- plugin_version="1",
39
- score_activation=0,
40
- score_threshold=0.25,
41
- ):
42
- batch_size, num_boxes, num_classes = scores.shape
43
- num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32)
44
- det_boxes = torch.randn(batch_size, max_output_boxes, 4)
45
- det_scores = torch.randn(batch_size, max_output_boxes)
46
- det_classes = torch.randint(0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32)
47
-
48
- return num_det, det_boxes, det_scores, det_classes
49
-
50
- @staticmethod
51
- def symbolic(g,
52
- boxes,
53
- scores,
54
- background_class=-1,
55
- box_coding=1,
56
- iou_threshold=0.45,
57
- max_output_boxes=100,
58
- plugin_version="1",
59
- score_activation=0,
60
- score_threshold=0.25):
61
- out = g.op("TRT::EfficientNMS_TRT",
62
- boxes,
63
- scores,
64
- background_class_i=background_class,
65
- box_coding_i=box_coding,
66
- iou_threshold_f=iou_threshold,
67
- max_output_boxes_i=max_output_boxes,
68
- plugin_version_s=plugin_version,
69
- score_activation_i=score_activation,
70
- score_threshold_f=score_threshold,
71
- outputs=4)
72
- nums, boxes, scores, classes = out
73
- return nums,boxes,scores,classes
74
-
75
-
76
-
77
- class ONNX_ORT(nn.Module):
78
-
79
- def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=640, device=None):
80
- super().__init__()
81
- self.device = device if device else torch.device("cpu")
82
- self.max_obj = torch.tensor([max_obj]).to(device)
83
- self.iou_threshold = torch.tensor([iou_thres]).to(device)
84
- self.score_threshold = torch.tensor([score_thres]).to(device)
85
- self.max_wh = max_wh
86
- self.convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
87
- dtype=torch.float32,
88
- device=self.device)
89
-
90
- def forward(self, x):
91
- box = x[:, :, :4]
92
- conf = x[:, :, 4:5]
93
- score = x[:, :, 5:]
94
- score *= conf
95
- box @= self.convert_matrix
96
- objScore, objCls = score.max(2, keepdim=True)
97
- dis = objCls.float() * self.max_wh
98
- nmsbox = box + dis
99
- objScore1 = objScore.transpose(1, 2).contiguous()
100
- selected_indices = ORT_NMS.apply(nmsbox, objScore1, self.max_obj, self.iou_threshold, self.score_threshold)
101
- X, Y = selected_indices[:, 0], selected_indices[:, 2]
102
- resBoxes = box[X, Y, :]
103
- resClasses = objCls[X, Y, :].float()
104
- resScores = objScore[X, Y, :]
105
- X = X.unsqueeze(1).float()
106
- return torch.cat([X, resBoxes, resClasses, resScores], 1)
107
-
108
- class ONNX_TRT(nn.Module):
109
-
110
- def __init__(self, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None ,device=None):
111
- super().__init__()
112
- assert max_wh is None
113
- self.device = device if device else torch.device('cpu')
114
- self.background_class = -1,
115
- self.box_coding = 1,
116
- self.iou_threshold = iou_thres
117
- self.max_obj = max_obj
118
- self.plugin_version = '1'
119
- self.score_activation = 0
120
- self.score_threshold = score_thres
121
-
122
- def forward(self, x):
123
- box = x[:, :, :4]
124
- conf = x[:, :, 4:5]
125
- score = x[:, :, 5:]
126
- score *= conf
127
- num_det, det_boxes, det_scores, det_classes = TRT_NMS.apply(box, score, self.background_class, self.box_coding,
128
- self.iou_threshold, self.max_obj,
129
- self.plugin_version, self.score_activation,
130
- self.score_threshold)
131
- return num_det, det_boxes, det_scores, det_classes
132
-
133
-
134
- class End2End(nn.Module):
135
-
136
- def __init__(self, model, max_obj=100, iou_thres=0.45, score_thres=0.25, max_wh=None, device=None):
137
- super().__init__()
138
- device = device if device else torch.device('cpu')
139
- self.model = model.to(device)
140
- self.patch_model = ONNX_TRT if max_wh is None else ONNX_ORT
141
- self.end2end = self.patch_model(max_obj, iou_thres, score_thres, max_wh, device)
142
- self.end2end.eval()
143
-
144
- def forward(self, x):
145
- x = self.model(x)
146
- x = self.end2end(x)
147
- return x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/models/loss.py DELETED
@@ -1,411 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
-
4
- # The code is based on
5
- # https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/models/yolo_head.py
6
- # Copyright (c) Megvii, Inc. and its affiliates.
7
-
8
- import torch
9
- import torch.nn as nn
10
- import numpy as np
11
- import torch.nn.functional as F
12
- from yolov6.utils.figure_iou import IOUloss, pairwise_bbox_iou
13
-
14
-
15
- class ComputeLoss:
16
- '''Loss computation func.
17
- This func contains SimOTA and siou loss.
18
- '''
19
- def __init__(self,
20
- reg_weight=5.0,
21
- iou_weight=3.0,
22
- cls_weight=1.0,
23
- center_radius=2.5,
24
- eps=1e-7,
25
- in_channels=[256, 512, 1024],
26
- strides=[8, 16, 32],
27
- n_anchors=1,
28
- iou_type='ciou'
29
- ):
30
-
31
- self.reg_weight = reg_weight
32
- self.iou_weight = iou_weight
33
- self.cls_weight = cls_weight
34
-
35
- self.center_radius = center_radius
36
- self.eps = eps
37
- self.n_anchors = n_anchors
38
- self.strides = strides
39
- self.grids = [torch.zeros(1)] * len(in_channels)
40
-
41
- # Define criteria
42
- self.l1_loss = nn.L1Loss(reduction="none")
43
- self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none")
44
- self.iou_loss = IOUloss(iou_type=iou_type, reduction="none")
45
-
46
- def __call__(
47
- self,
48
- outputs,
49
- targets
50
- ):
51
- dtype = outputs[0].type()
52
- device = targets.device
53
- loss_cls, loss_obj, loss_iou, loss_l1 = torch.zeros(1, device=device), torch.zeros(1, device=device), \
54
- torch.zeros(1, device=device), torch.zeros(1, device=device)
55
- num_classes = outputs[0].shape[-1] - 5
56
-
57
- outputs, outputs_origin, gt_bboxes_scale, xy_shifts, expanded_strides = self.get_outputs_and_grids(
58
- outputs, self.strides, dtype, device)
59
-
60
- total_num_anchors = outputs.shape[1]
61
- bbox_preds = outputs[:, :, :4] # [batch, n_anchors_all, 4]
62
- bbox_preds_org = outputs_origin[:, :, :4] # [batch, n_anchors_all, 4]
63
- obj_preds = outputs[:, :, 4].unsqueeze(-1) # [batch, n_anchors_all, 1]
64
- cls_preds = outputs[:, :, 5:] # [batch, n_anchors_all, n_cls]
65
-
66
- # targets
67
- batch_size = bbox_preds.shape[0]
68
- targets_list = np.zeros((batch_size, 1, 5)).tolist()
69
- for i, item in enumerate(targets.cpu().numpy().tolist()):
70
- targets_list[int(item[0])].append(item[1:])
71
- max_len = max((len(l) for l in targets_list))
72
-
73
- targets = torch.from_numpy(np.array(list(map(lambda l:l + [[-1,0,0,0,0]]*(max_len - len(l)), targets_list)))[:,1:,:]).to(targets.device)
74
- num_targets_list = (targets.sum(dim=2) > 0).sum(dim=1) # number of objects
75
-
76
- num_fg, num_gts = 0, 0
77
- cls_targets, reg_targets, l1_targets, obj_targets, fg_masks = [], [], [], [], []
78
-
79
- for batch_idx in range(batch_size):
80
- num_gt = int(num_targets_list[batch_idx])
81
- num_gts += num_gt
82
- if num_gt == 0:
83
- cls_target = outputs.new_zeros((0, num_classes))
84
- reg_target = outputs.new_zeros((0, 4))
85
- l1_target = outputs.new_zeros((0, 4))
86
- obj_target = outputs.new_zeros((total_num_anchors, 1))
87
- fg_mask = outputs.new_zeros(total_num_anchors).bool()
88
- else:
89
-
90
- gt_bboxes_per_image = targets[batch_idx, :num_gt, 1:5].mul_(gt_bboxes_scale)
91
- gt_classes = targets[batch_idx, :num_gt, 0]
92
- bboxes_preds_per_image = bbox_preds[batch_idx]
93
- cls_preds_per_image = cls_preds[batch_idx]
94
- obj_preds_per_image = obj_preds[batch_idx]
95
-
96
- try:
97
- (
98
- gt_matched_classes,
99
- fg_mask,
100
- pred_ious_this_matching,
101
- matched_gt_inds,
102
- num_fg_img,
103
- ) = self.get_assignments(
104
- batch_idx,
105
- num_gt,
106
- total_num_anchors,
107
- gt_bboxes_per_image,
108
- gt_classes,
109
- bboxes_preds_per_image,
110
- cls_preds_per_image,
111
- obj_preds_per_image,
112
- expanded_strides,
113
- xy_shifts,
114
- num_classes
115
- )
116
-
117
- except RuntimeError:
118
- print(
119
- "OOM RuntimeError is raised due to the huge memory cost during label assignment. \
120
- CPU mode is applied in this batch. If you want to avoid this issue, \
121
- try to reduce the batch size or image size."
122
- )
123
- torch.cuda.empty_cache()
124
- print("------------CPU Mode for This Batch-------------")
125
-
126
- _gt_bboxes_per_image = gt_bboxes_per_image.cpu().float()
127
- _gt_classes = gt_classes.cpu().float()
128
- _bboxes_preds_per_image = bboxes_preds_per_image.cpu().float()
129
- _cls_preds_per_image = cls_preds_per_image.cpu().float()
130
- _obj_preds_per_image = obj_preds_per_image.cpu().float()
131
-
132
- _expanded_strides = expanded_strides.cpu().float()
133
- _xy_shifts = xy_shifts.cpu()
134
-
135
- (
136
- gt_matched_classes,
137
- fg_mask,
138
- pred_ious_this_matching,
139
- matched_gt_inds,
140
- num_fg_img,
141
- ) = self.get_assignments(
142
- batch_idx,
143
- num_gt,
144
- total_num_anchors,
145
- _gt_bboxes_per_image,
146
- _gt_classes,
147
- _bboxes_preds_per_image,
148
- _cls_preds_per_image,
149
- _obj_preds_per_image,
150
- _expanded_strides,
151
- _xy_shifts,
152
- num_classes
153
- )
154
-
155
- gt_matched_classes = gt_matched_classes.cuda()
156
- fg_mask = fg_mask.cuda()
157
- pred_ious_this_matching = pred_ious_this_matching.cuda()
158
- matched_gt_inds = matched_gt_inds.cuda()
159
-
160
- torch.cuda.empty_cache()
161
- num_fg += num_fg_img
162
- if num_fg_img > 0:
163
- cls_target = F.one_hot(
164
- gt_matched_classes.to(torch.int64), num_classes
165
- ) * pred_ious_this_matching.unsqueeze(-1)
166
- obj_target = fg_mask.unsqueeze(-1)
167
- reg_target = gt_bboxes_per_image[matched_gt_inds]
168
-
169
- l1_target = self.get_l1_target(
170
- outputs.new_zeros((num_fg_img, 4)),
171
- gt_bboxes_per_image[matched_gt_inds],
172
- expanded_strides[0][fg_mask],
173
- xy_shifts=xy_shifts[0][fg_mask],
174
- )
175
-
176
- cls_targets.append(cls_target)
177
- reg_targets.append(reg_target)
178
- obj_targets.append(obj_target)
179
- l1_targets.append(l1_target)
180
- fg_masks.append(fg_mask)
181
-
182
- cls_targets = torch.cat(cls_targets, 0)
183
- reg_targets = torch.cat(reg_targets, 0)
184
- obj_targets = torch.cat(obj_targets, 0)
185
- l1_targets = torch.cat(l1_targets, 0)
186
- fg_masks = torch.cat(fg_masks, 0)
187
-
188
- num_fg = max(num_fg, 1)
189
- # loss
190
- loss_iou += (self.iou_loss(bbox_preds.view(-1, 4)[fg_masks].T, reg_targets)).sum() / num_fg
191
- loss_l1 += (self.l1_loss(bbox_preds_org.view(-1, 4)[fg_masks], l1_targets)).sum() / num_fg
192
-
193
- loss_obj += (self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets*1.0)).sum() / num_fg
194
- loss_cls += (self.bcewithlog_loss(cls_preds.view(-1, num_classes)[fg_masks], cls_targets)).sum() / num_fg
195
-
196
- total_losses = self.reg_weight * loss_iou + loss_l1 + loss_obj + loss_cls
197
- return total_losses, torch.cat((self.reg_weight * loss_iou, loss_l1, loss_obj, loss_cls)).detach()
198
-
199
- def decode_output(self, output, k, stride, dtype, device):
200
- grid = self.grids[k].to(device)
201
- batch_size = output.shape[0]
202
- hsize, wsize = output.shape[2:4]
203
- if grid.shape[2:4] != output.shape[2:4]:
204
- yv, xv = torch.meshgrid([torch.arange(hsize), torch.arange(wsize)])
205
- grid = torch.stack((xv, yv), 2).view(1, 1, hsize, wsize, 2).type(dtype).to(device)
206
- self.grids[k] = grid
207
-
208
- output = output.reshape(batch_size, self.n_anchors * hsize * wsize, -1)
209
- output_origin = output.clone()
210
- grid = grid.view(1, -1, 2)
211
-
212
- output[..., :2] = (output[..., :2] + grid) * stride
213
- output[..., 2:4] = torch.exp(output[..., 2:4]) * stride
214
-
215
- return output, output_origin, grid, hsize, wsize
216
-
217
- def get_outputs_and_grids(self, outputs, strides, dtype, device):
218
- xy_shifts = []
219
- expanded_strides = []
220
- outputs_new = []
221
- outputs_origin = []
222
-
223
- for k, output in enumerate(outputs):
224
- output, output_origin, grid, feat_h, feat_w = self.decode_output(
225
- output, k, strides[k], dtype, device)
226
-
227
- xy_shift = grid
228
- expanded_stride = torch.full((1, grid.shape[1], 1), strides[k], dtype=grid.dtype, device=grid.device)
229
-
230
- xy_shifts.append(xy_shift)
231
- expanded_strides.append(expanded_stride)
232
- outputs_new.append(output)
233
- outputs_origin.append(output_origin)
234
-
235
- xy_shifts = torch.cat(xy_shifts, 1) # [1, n_anchors_all, 2]
236
- expanded_strides = torch.cat(expanded_strides, 1) # [1, n_anchors_all, 1]
237
- outputs_origin = torch.cat(outputs_origin, 1)
238
- outputs = torch.cat(outputs_new, 1)
239
-
240
- feat_h *= strides[-1]
241
- feat_w *= strides[-1]
242
- gt_bboxes_scale = torch.Tensor([[feat_w, feat_h, feat_w, feat_h]]).type_as(outputs)
243
-
244
- return outputs, outputs_origin, gt_bboxes_scale, xy_shifts, expanded_strides
245
-
246
- def get_l1_target(self, l1_target, gt, stride, xy_shifts, eps=1e-8):
247
-
248
- l1_target[:, 0:2] = gt[:, 0:2] / stride - xy_shifts
249
- l1_target[:, 2:4] = torch.log(gt[:, 2:4] / stride + eps)
250
- return l1_target
251
-
252
- @torch.no_grad()
253
- def get_assignments(
254
- self,
255
- batch_idx,
256
- num_gt,
257
- total_num_anchors,
258
- gt_bboxes_per_image,
259
- gt_classes,
260
- bboxes_preds_per_image,
261
- cls_preds_per_image,
262
- obj_preds_per_image,
263
- expanded_strides,
264
- xy_shifts,
265
- num_classes
266
- ):
267
-
268
- fg_mask, is_in_boxes_and_center = self.get_in_boxes_info(
269
- gt_bboxes_per_image,
270
- expanded_strides,
271
- xy_shifts,
272
- total_num_anchors,
273
- num_gt,
274
- )
275
-
276
- bboxes_preds_per_image = bboxes_preds_per_image[fg_mask]
277
- cls_preds_ = cls_preds_per_image[fg_mask]
278
- obj_preds_ = obj_preds_per_image[fg_mask]
279
- num_in_boxes_anchor = bboxes_preds_per_image.shape[0]
280
-
281
- # cost
282
- pair_wise_ious = pairwise_bbox_iou(gt_bboxes_per_image, bboxes_preds_per_image, box_format='xywh')
283
- pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8)
284
-
285
- gt_cls_per_image = (
286
- F.one_hot(gt_classes.to(torch.int64), num_classes)
287
- .float()
288
- .unsqueeze(1)
289
- .repeat(1, num_in_boxes_anchor, 1)
290
- )
291
-
292
- with torch.cuda.amp.autocast(enabled=False):
293
- cls_preds_ = (
294
- cls_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1)
295
- * obj_preds_.float().sigmoid_().unsqueeze(0).repeat(num_gt, 1, 1)
296
- )
297
- pair_wise_cls_loss = F.binary_cross_entropy(
298
- cls_preds_.sqrt_(), gt_cls_per_image, reduction="none"
299
- ).sum(-1)
300
- del cls_preds_, obj_preds_
301
-
302
- cost = (
303
- self.cls_weight * pair_wise_cls_loss
304
- + self.iou_weight * pair_wise_ious_loss
305
- + 100000.0 * (~is_in_boxes_and_center)
306
- )
307
-
308
- (
309
- num_fg,
310
- gt_matched_classes,
311
- pred_ious_this_matching,
312
- matched_gt_inds,
313
- ) = self.dynamic_k_matching(cost, pair_wise_ious, gt_classes, num_gt, fg_mask)
314
-
315
- del pair_wise_cls_loss, cost, pair_wise_ious, pair_wise_ious_loss
316
-
317
- return (
318
- gt_matched_classes,
319
- fg_mask,
320
- pred_ious_this_matching,
321
- matched_gt_inds,
322
- num_fg,
323
- )
324
-
325
- def get_in_boxes_info(
326
- self,
327
- gt_bboxes_per_image,
328
- expanded_strides,
329
- xy_shifts,
330
- total_num_anchors,
331
- num_gt,
332
- ):
333
- expanded_strides_per_image = expanded_strides[0]
334
- xy_shifts_per_image = xy_shifts[0] * expanded_strides_per_image
335
- xy_centers_per_image = (
336
- (xy_shifts_per_image + 0.5 * expanded_strides_per_image)
337
- .unsqueeze(0)
338
- .repeat(num_gt, 1, 1)
339
- ) # [n_anchor, 2] -> [n_gt, n_anchor, 2]
340
-
341
- gt_bboxes_per_image_lt = (
342
- (gt_bboxes_per_image[:, 0:2] - 0.5 * gt_bboxes_per_image[:, 2:4])
343
- .unsqueeze(1)
344
- .repeat(1, total_num_anchors, 1)
345
- )
346
- gt_bboxes_per_image_rb = (
347
- (gt_bboxes_per_image[:, 0:2] + 0.5 * gt_bboxes_per_image[:, 2:4])
348
- .unsqueeze(1)
349
- .repeat(1, total_num_anchors, 1)
350
- ) # [n_gt, 2] -> [n_gt, n_anchor, 2]
351
-
352
- b_lt = xy_centers_per_image - gt_bboxes_per_image_lt
353
- b_rb = gt_bboxes_per_image_rb - xy_centers_per_image
354
- bbox_deltas = torch.cat([b_lt, b_rb], 2)
355
-
356
- is_in_boxes = bbox_deltas.min(dim=-1).values > 0.0
357
- is_in_boxes_all = is_in_boxes.sum(dim=0) > 0
358
-
359
- # in fixed center
360
- gt_bboxes_per_image_lt = (gt_bboxes_per_image[:, 0:2]).unsqueeze(1).repeat(
361
- 1, total_num_anchors, 1
362
- ) - self.center_radius * expanded_strides_per_image.unsqueeze(0)
363
- gt_bboxes_per_image_rb = (gt_bboxes_per_image[:, 0:2]).unsqueeze(1).repeat(
364
- 1, total_num_anchors, 1
365
- ) + self.center_radius * expanded_strides_per_image.unsqueeze(0)
366
-
367
- c_lt = xy_centers_per_image - gt_bboxes_per_image_lt
368
- c_rb = gt_bboxes_per_image_rb - xy_centers_per_image
369
- center_deltas = torch.cat([c_lt, c_rb], 2)
370
- is_in_centers = center_deltas.min(dim=-1).values > 0.0
371
- is_in_centers_all = is_in_centers.sum(dim=0) > 0
372
-
373
- # in boxes and in centers
374
- is_in_boxes_anchor = is_in_boxes_all | is_in_centers_all
375
-
376
- is_in_boxes_and_center = (
377
- is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor]
378
- )
379
- return is_in_boxes_anchor, is_in_boxes_and_center
380
-
381
- def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask):
382
- matching_matrix = torch.zeros_like(cost, dtype=torch.uint8)
383
- ious_in_boxes_matrix = pair_wise_ious
384
- n_candidate_k = min(10, ious_in_boxes_matrix.size(1))
385
- topk_ious, _ = torch.topk(ious_in_boxes_matrix, n_candidate_k, dim=1)
386
- dynamic_ks = torch.clamp(topk_ious.sum(1).int(), min=1)
387
- dynamic_ks = dynamic_ks.tolist()
388
-
389
- for gt_idx in range(num_gt):
390
- _, pos_idx = torch.topk(
391
- cost[gt_idx], k=dynamic_ks[gt_idx], largest=False
392
- )
393
- matching_matrix[gt_idx][pos_idx] = 1
394
- del topk_ious, dynamic_ks, pos_idx
395
-
396
- anchor_matching_gt = matching_matrix.sum(0)
397
- if (anchor_matching_gt > 1).sum() > 0:
398
- _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0)
399
- matching_matrix[:, anchor_matching_gt > 1] *= 0
400
- matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1
401
- fg_mask_inboxes = matching_matrix.sum(0) > 0
402
- num_fg = fg_mask_inboxes.sum().item()
403
- fg_mask[fg_mask.clone()] = fg_mask_inboxes
404
- matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0)
405
- gt_matched_classes = gt_classes[matched_gt_inds]
406
-
407
- pred_ious_this_matching = (matching_matrix * pair_wise_ious).sum(0)[
408
- fg_mask_inboxes
409
- ]
410
-
411
- return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/models/reppan.py DELETED
@@ -1,108 +0,0 @@
1
- import torch
2
- from torch import nn
3
- from yolov6.layers.common import RepBlock, SimConv, Transpose
4
-
5
-
6
- class RepPANNeck(nn.Module):
7
- """RepPANNeck Module
8
- EfficientRep is the default backbone of this model.
9
- RepPANNeck has the balance of feature fusion ability and hardware efficiency.
10
- """
11
-
12
- def __init__(
13
- self,
14
- channels_list=None,
15
- num_repeats=None
16
- ):
17
- super().__init__()
18
-
19
- assert channels_list is not None
20
- assert num_repeats is not None
21
-
22
- self.Rep_p4 = RepBlock(
23
- in_channels=channels_list[3] + channels_list[5],
24
- out_channels=channels_list[5],
25
- n=num_repeats[5],
26
- )
27
-
28
- self.Rep_p3 = RepBlock(
29
- in_channels=channels_list[2] + channels_list[6],
30
- out_channels=channels_list[6],
31
- n=num_repeats[6]
32
- )
33
-
34
- self.Rep_n3 = RepBlock(
35
- in_channels=channels_list[6] + channels_list[7],
36
- out_channels=channels_list[8],
37
- n=num_repeats[7],
38
- )
39
-
40
- self.Rep_n4 = RepBlock(
41
- in_channels=channels_list[5] + channels_list[9],
42
- out_channels=channels_list[10],
43
- n=num_repeats[8]
44
- )
45
-
46
- self.reduce_layer0 = SimConv(
47
- in_channels=channels_list[4],
48
- out_channels=channels_list[5],
49
- kernel_size=1,
50
- stride=1
51
- )
52
-
53
- self.upsample0 = Transpose(
54
- in_channels=channels_list[5],
55
- out_channels=channels_list[5],
56
- )
57
-
58
- self.reduce_layer1 = SimConv(
59
- in_channels=channels_list[5],
60
- out_channels=channels_list[6],
61
- kernel_size=1,
62
- stride=1
63
- )
64
-
65
- self.upsample1 = Transpose(
66
- in_channels=channels_list[6],
67
- out_channels=channels_list[6]
68
- )
69
-
70
- self.downsample2 = SimConv(
71
- in_channels=channels_list[6],
72
- out_channels=channels_list[7],
73
- kernel_size=3,
74
- stride=2
75
- )
76
-
77
- self.downsample1 = SimConv(
78
- in_channels=channels_list[8],
79
- out_channels=channels_list[9],
80
- kernel_size=3,
81
- stride=2
82
- )
83
-
84
- def forward(self, input):
85
-
86
- (x2, x1, x0) = input
87
-
88
- fpn_out0 = self.reduce_layer0(x0)
89
- upsample_feat0 = self.upsample0(fpn_out0)
90
- f_concat_layer0 = torch.cat([upsample_feat0, x1], 1)
91
- f_out0 = self.Rep_p4(f_concat_layer0)
92
-
93
- fpn_out1 = self.reduce_layer1(f_out0)
94
- upsample_feat1 = self.upsample1(fpn_out1)
95
- f_concat_layer1 = torch.cat([upsample_feat1, x2], 1)
96
- pan_out2 = self.Rep_p3(f_concat_layer1)
97
-
98
- down_feat1 = self.downsample2(pan_out2)
99
- p_concat_layer1 = torch.cat([down_feat1, fpn_out1], 1)
100
- pan_out1 = self.Rep_n3(p_concat_layer1)
101
-
102
- down_feat0 = self.downsample1(pan_out1)
103
- p_concat_layer2 = torch.cat([down_feat0, fpn_out0], 1)
104
- pan_out0 = self.Rep_n4(p_concat_layer2)
105
-
106
- outputs = [pan_out2, pan_out1, pan_out0]
107
-
108
- return outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/models/yolo.py DELETED
@@ -1,83 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- import math
4
- import torch.nn as nn
5
- from yolov6.layers.common import *
6
- from yolov6.utils.torch_utils import initialize_weights
7
- from yolov6.models.efficientrep import EfficientRep
8
- from yolov6.models.reppan import RepPANNeck
9
- from yolov6.models.effidehead import Detect, build_effidehead_layer
10
-
11
-
12
- class Model(nn.Module):
13
- '''YOLOv6 model with backbone, neck and head.
14
- The default parts are EfficientRep Backbone, Rep-PAN and
15
- Efficient Decoupled Head.
16
- '''
17
- def __init__(self, config, channels=3, num_classes=None, anchors=None): # model, input channels, number of classes
18
- super().__init__()
19
- # Build network
20
- num_layers = config.model.head.num_layers
21
- self.backbone, self.neck, self.detect = build_network(config, channels, num_classes, anchors, num_layers)
22
-
23
- # Init Detect head
24
- begin_indices = config.model.head.begin_indices
25
- out_indices_head = config.model.head.out_indices
26
- self.stride = self.detect.stride
27
- self.detect.i = begin_indices
28
- self.detect.f = out_indices_head
29
- self.detect.initialize_biases()
30
-
31
- # Init weights
32
- initialize_weights(self)
33
-
34
- def forward(self, x):
35
- x = self.backbone(x)
36
- x = self.neck(x)
37
- x = self.detect(x)
38
- return x
39
-
40
- def _apply(self, fn):
41
- self = super()._apply(fn)
42
- self.detect.stride = fn(self.detect.stride)
43
- self.detect.grid = list(map(fn, self.detect.grid))
44
- return self
45
-
46
-
47
- def make_divisible(x, divisor):
48
- # Upward revision the value x to make it evenly divisible by the divisor.
49
- return math.ceil(x / divisor) * divisor
50
-
51
-
52
- def build_network(config, channels, num_classes, anchors, num_layers):
53
- depth_mul = config.model.depth_multiple
54
- width_mul = config.model.width_multiple
55
- num_repeat_backbone = config.model.backbone.num_repeats
56
- channels_list_backbone = config.model.backbone.out_channels
57
- num_repeat_neck = config.model.neck.num_repeats
58
- channels_list_neck = config.model.neck.out_channels
59
- num_anchors = config.model.head.anchors
60
- num_repeat = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in (num_repeat_backbone + num_repeat_neck)]
61
- channels_list = [make_divisible(i * width_mul, 8) for i in (channels_list_backbone + channels_list_neck)]
62
-
63
- backbone = EfficientRep(
64
- in_channels=channels,
65
- channels_list=channels_list,
66
- num_repeats=num_repeat
67
- )
68
-
69
- neck = RepPANNeck(
70
- channels_list=channels_list,
71
- num_repeats=num_repeat
72
- )
73
-
74
- head_layers = build_effidehead_layer(channels_list, num_anchors, num_classes)
75
-
76
- head = Detect(num_classes, anchors, num_layers, head_layers=head_layers)
77
-
78
- return backbone, neck, head
79
-
80
-
81
- def build_model(cfg, num_classes, device):
82
- model = Model(cfg, channels=3, num_classes=num_classes, anchors=cfg.model.head.anchors).to(device)
83
- return model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/solver/build.py DELETED
@@ -1,42 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- import os
4
- import math
5
-
6
- import torch
7
- import torch.nn as nn
8
-
9
-
10
- def build_optimizer(cfg, model):
11
- """ Build optimizer from cfg file."""
12
- g_bnw, g_w, g_b = [], [], []
13
- for v in model.modules():
14
- if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
15
- g_b.append(v.bias)
16
- if isinstance(v, nn.BatchNorm2d):
17
- g_bnw.append(v.weight)
18
- elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
19
- g_w.append(v.weight)
20
-
21
- assert cfg.solver.optim == 'SGD' or 'Adam', 'ERROR: unknown optimizer, use SGD defaulted'
22
- if cfg.solver.optim == 'SGD':
23
- optimizer = torch.optim.SGD(g_bnw, lr=cfg.solver.lr0, momentum=cfg.solver.momentum, nesterov=True)
24
- elif cfg.solver.optim == 'Adam':
25
- optimizer = torch.optim.Adam(g_bnw, lr=cfg.solver.lr0, betas=(cfg.solver.momentum, 0.999))
26
-
27
- optimizer.add_param_group({'params': g_w, 'weight_decay': cfg.solver.weight_decay})
28
- optimizer.add_param_group({'params': g_b})
29
-
30
- del g_bnw, g_w, g_b
31
- return optimizer
32
-
33
-
34
- def build_lr_scheduler(cfg, optimizer, epochs):
35
- """Build learning rate scheduler from cfg file."""
36
- if cfg.solver.lr_scheduler == 'Cosine':
37
- lf = lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) * (cfg.solver.lrf - 1) + 1
38
- else:
39
- LOGGER.error('unknown lr scheduler, use Cosine defaulted')
40
-
41
- scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
42
- return scheduler, lf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/utils/Arial.ttf DELETED
Binary file (773 kB)
 
yolov6/utils/checkpoint.py DELETED
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- import os
4
- import shutil
5
- import torch
6
- import os.path as osp
7
- from yolov6.utils.events import LOGGER
8
- from yolov6.utils.torch_utils import fuse_model
9
-
10
-
11
- def load_state_dict(weights, model, map_location=None):
12
- """Load weights from checkpoint file, only assign weights those layers' name and shape are match."""
13
- ckpt = torch.load(weights, map_location=map_location)
14
- state_dict = ckpt['model'].float().state_dict()
15
- model_state_dict = model.state_dict()
16
- state_dict = {k: v for k, v in state_dict.items() if k in model_state_dict and v.shape == model_state_dict[k].shape}
17
- model.load_state_dict(state_dict, strict=False)
18
- del ckpt, state_dict, model_state_dict
19
- return model
20
-
21
-
22
- def load_checkpoint(weights, map_location=None, inplace=True, fuse=True):
23
- """Load model from checkpoint file."""
24
- LOGGER.info("Loading checkpoint from {}".format(weights))
25
- ckpt = torch.load(weights, map_location=map_location) # load
26
- model = ckpt['ema' if ckpt.get('ema') else 'model'].float()
27
- if fuse:
28
- LOGGER.info("\nFusing model...")
29
- model = fuse_model(model).eval()
30
- else:
31
- model = model.eval()
32
- return model
33
-
34
-
35
- def save_checkpoint(ckpt, is_best, save_dir, model_name=""):
36
- """ Save checkpoint to the disk."""
37
- if not osp.exists(save_dir):
38
- os.makedirs(save_dir)
39
- filename = osp.join(save_dir, model_name + '.pt')
40
- torch.save(ckpt, filename)
41
- if is_best:
42
- best_filename = osp.join(save_dir, 'best_ckpt.pt')
43
- shutil.copyfile(filename, best_filename)
44
-
45
-
46
- def strip_optimizer(ckpt_dir, epoch):
47
- for s in ['best', 'last']:
48
- ckpt_path = osp.join(ckpt_dir, '{}_ckpt.pt'.format(s))
49
- if not osp.exists(ckpt_path):
50
- continue
51
- ckpt = torch.load(ckpt_path, map_location=torch.device('cpu'))
52
- if ckpt.get('ema'):
53
- ckpt['model'] = ckpt['ema'] # replace model with ema
54
- for k in ['optimizer', 'ema', 'updates']: # keys
55
- ckpt[k] = None
56
- ckpt['epoch'] = epoch
57
- ckpt['model'].half() # to FP16
58
- for p in ckpt['model'].parameters():
59
- p.requires_grad = False
60
- torch.save(ckpt, ckpt_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/utils/config.py DELETED
@@ -1,101 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- # The code is based on
4
- # https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
5
- # Copyright (c) OpenMMLab.
6
-
7
- import os.path as osp
8
- import shutil
9
- import sys
10
- import tempfile
11
- from importlib import import_module
12
- from addict import Dict
13
-
14
-
15
- class ConfigDict(Dict):
16
-
17
- def __missing__(self, name):
18
- raise KeyError(name)
19
-
20
- def __getattr__(self, name):
21
- try:
22
- value = super(ConfigDict, self).__getattr__(name)
23
- except KeyError:
24
- ex = AttributeError("'{}' object has no attribute '{}'".format(
25
- self.__class__.__name__, name))
26
- except Exception as e:
27
- ex = e
28
- else:
29
- return value
30
- raise ex
31
-
32
-
33
- class Config(object):
34
-
35
- @staticmethod
36
- def _file2dict(filename):
37
- filename = str(filename)
38
- if filename.endswith('.py'):
39
- with tempfile.TemporaryDirectory() as temp_config_dir:
40
- shutil.copyfile(filename,
41
- osp.join(temp_config_dir, '_tempconfig.py'))
42
- sys.path.insert(0, temp_config_dir)
43
- mod = import_module('_tempconfig')
44
- sys.path.pop(0)
45
- cfg_dict = {
46
- name: value
47
- for name, value in mod.__dict__.items()
48
- if not name.startswith('__')
49
- }
50
- # delete imported module
51
- del sys.modules['_tempconfig']
52
- else:
53
- raise IOError('Only .py type are supported now!')
54
- cfg_text = filename + '\n'
55
- with open(filename, 'r') as f:
56
- cfg_text += f.read()
57
-
58
- return cfg_dict, cfg_text
59
-
60
- @staticmethod
61
- def fromfile(filename):
62
- cfg_dict, cfg_text = Config._file2dict(filename)
63
- return Config(cfg_dict, cfg_text=cfg_text, filename=filename)
64
-
65
- def __init__(self, cfg_dict=None, cfg_text=None, filename=None):
66
- if cfg_dict is None:
67
- cfg_dict = dict()
68
- elif not isinstance(cfg_dict, dict):
69
- raise TypeError('cfg_dict must be a dict, but got {}'.format(
70
- type(cfg_dict)))
71
-
72
- super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict))
73
- super(Config, self).__setattr__('_filename', filename)
74
- if cfg_text:
75
- text = cfg_text
76
- elif filename:
77
- with open(filename, 'r') as f:
78
- text = f.read()
79
- else:
80
- text = ''
81
- super(Config, self).__setattr__('_text', text)
82
-
83
- @property
84
- def filename(self):
85
- return self._filename
86
-
87
- @property
88
- def text(self):
89
- return self._text
90
-
91
- def __repr__(self):
92
- return 'Config (path: {}): {}'.format(self.filename,
93
- self._cfg_dict.__repr__())
94
-
95
- def __getattr__(self, name):
96
- return getattr(self._cfg_dict, name)
97
-
98
- def __setattr__(self, name, value):
99
- if isinstance(value, dict):
100
- value = ConfigDict(value)
101
- self._cfg_dict.__setattr__(name, value)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/utils/ema.py DELETED
@@ -1,59 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- # The code is based on
4
- # https://github.com/ultralytics/yolov5/blob/master/utils/torch_utils.py
5
- import math
6
- from copy import deepcopy
7
- import torch
8
- import torch.nn as nn
9
-
10
-
11
- class ModelEMA:
12
- """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
13
- Keep a moving average of everything in the model state_dict (parameters and buffers).
14
- This is intended to allow functionality like
15
- https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
16
- A smoothed version of the weights is necessary for some training schemes to perform well.
17
- This class is sensitive where it is initialized in the sequence of model init,
18
- GPU assignment and distributed training wrappers.
19
- """
20
-
21
- def __init__(self, model, decay=0.9999, updates=0):
22
- self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA
23
- self.updates = updates
24
- self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
25
- for param in self.ema.parameters():
26
- param.requires_grad_(False)
27
-
28
- def update(self, model):
29
- with torch.no_grad():
30
- self.updates += 1
31
- decay = self.decay(self.updates)
32
-
33
- state_dict = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
34
- for k, item in self.ema.state_dict().items():
35
- if item.dtype.is_floating_point:
36
- item *= decay
37
- item += (1 - decay) * state_dict[k].detach()
38
-
39
- def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
40
- copy_attr(self.ema, model, include, exclude)
41
-
42
-
43
- def copy_attr(a, b, include=(), exclude=()):
44
- """Copy attributes from one instance and set them to another instance."""
45
- for k, item in b.__dict__.items():
46
- if (len(include) and k not in include) or k.startswith('_') or k in exclude:
47
- continue
48
- else:
49
- setattr(a, k, item)
50
-
51
-
52
- def is_parallel(model):
53
- # Return True if model's type is DP or DDP, else False.
54
- return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
55
-
56
-
57
- def de_parallel(model):
58
- # De-parallelize a model. Return single-GPU model if model's type is DP or DDP.
59
- return model.module if is_parallel(model) else model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/utils/envs.py DELETED
@@ -1,54 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- import os
4
- import random
5
- import numpy as np
6
-
7
- import torch
8
- import torch.backends.cudnn as cudnn
9
- from yolov6.utils.events import LOGGER
10
-
11
-
12
- def get_envs():
13
- """Get PyTorch needed environments from system envirionments."""
14
- local_rank = int(os.getenv('LOCAL_RANK', -1))
15
- rank = int(os.getenv('RANK', -1))
16
- world_size = int(os.getenv('WORLD_SIZE', 1))
17
- return local_rank, rank, world_size
18
-
19
-
20
- def select_device(device):
21
- """Set devices' information to the program.
22
- Args:
23
- device: a string, like 'cpu' or '1,2,3,4'
24
- Returns:
25
- torch.device
26
- """
27
- if device == 'cpu':
28
- os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
29
- LOGGER.info('Using CPU for training... ')
30
- elif device:
31
- os.environ['CUDA_VISIBLE_DEVICES'] = device
32
- assert torch.cuda.is_available()
33
- nd = len(device.strip().split(','))
34
- LOGGER.info(f'Using {nd} GPU for training... ')
35
- cuda = device != 'cpu' and torch.cuda.is_available()
36
- device = torch.device('cuda:0' if cuda else 'cpu')
37
- return device
38
-
39
-
40
- def set_random_seed(seed, deterministic=False):
41
- """ Set random state to random libray, numpy, torch and cudnn.
42
- Args:
43
- seed: int value.
44
- deterministic: bool value.
45
- """
46
- random.seed(seed)
47
- np.random.seed(seed)
48
- torch.manual_seed(seed)
49
- if deterministic:
50
- cudnn.deterministic = True
51
- cudnn.benchmark = False
52
- else:
53
- cudnn.deterministic = False
54
- cudnn.benchmark = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/utils/events.py DELETED
@@ -1,41 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- import os
4
- import yaml
5
- import logging
6
- import shutil
7
-
8
-
9
- def set_logging(name=None):
10
- rank = int(os.getenv('RANK', -1))
11
- logging.basicConfig(format="%(message)s", level=logging.INFO if (rank in (-1, 0)) else logging.WARNING)
12
- return logging.getLogger(name)
13
-
14
-
15
- LOGGER = set_logging(__name__)
16
- NCOLS = shutil.get_terminal_size().columns
17
-
18
-
19
- def load_yaml(file_path):
20
- """Load data from yaml file."""
21
- if isinstance(file_path, str):
22
- with open(file_path, errors='ignore') as f:
23
- data_dict = yaml.safe_load(f)
24
- return data_dict
25
-
26
-
27
- def save_yaml(data_dict, save_path):
28
- """Save data to yaml file"""
29
- with open(save_path, 'w') as f:
30
- yaml.safe_dump(data_dict, f, sort_keys=False)
31
-
32
-
33
- def write_tblog(tblogger, epoch, results, losses):
34
- """Display mAP and loss information to log."""
35
- tblogger.add_scalar("val/mAP@0.5", results[0], epoch + 1)
36
- tblogger.add_scalar("val/mAP@0.50:0.95", results[1], epoch + 1)
37
-
38
- tblogger.add_scalar("train/iou_loss", losses[0], epoch + 1)
39
- tblogger.add_scalar("train/l1_loss", losses[1], epoch + 1)
40
- tblogger.add_scalar("train/obj_loss", losses[2], epoch + 1)
41
- tblogger.add_scalar("train/cls_loss", losses[3], epoch + 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/utils/figure_iou.py DELETED
@@ -1,114 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- import math
4
- import torch
5
-
6
-
7
- class IOUloss:
8
- """ Calculate IoU loss.
9
- """
10
- def __init__(self, box_format='xywh', iou_type='ciou', reduction='none', eps=1e-7):
11
- """ Setting of the class.
12
- Args:
13
- box_format: (string), must be one of 'xywh' or 'xyxy'.
14
- iou_type: (string), can be one of 'ciou', 'diou', 'giou' or 'siou'
15
- reduction: (string), specifies the reduction to apply to the output, must be one of 'none', 'mean','sum'.
16
- eps: (float), a value to avoid divide by zero error.
17
- """
18
- self.box_format = box_format
19
- self.iou_type = iou_type.lower()
20
- self.reduction = reduction
21
- self.eps = eps
22
-
23
- def __call__(self, box1, box2):
24
- """ calculate iou. box1 and box2 are torch tensor with shape [M, 4] and [Nm 4].
25
- """
26
- box2 = box2.T
27
- if self.box_format == 'xyxy':
28
- b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
29
- b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
30
- elif self.box_format == 'xywh':
31
- b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
32
- b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
33
- b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
34
- b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
35
-
36
- # Intersection area
37
- inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
38
- (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
39
-
40
- # Union Area
41
- w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + self.eps
42
- w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + self.eps
43
- union = w1 * h1 + w2 * h2 - inter + self.eps
44
- iou = inter / union
45
-
46
- cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex width
47
- ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
48
- if self.iou_type == 'giou':
49
- c_area = cw * ch + self.eps # convex area
50
- iou = iou - (c_area - union) / c_area
51
- elif self.iou_type in ['diou', 'ciou']:
52
- c2 = cw ** 2 + ch ** 2 + self.eps # convex diagonal squared
53
- rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
54
- (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
55
- if self.iou_type == 'diou':
56
- iou = iou - rho2 / c2
57
- elif self.iou_type == 'ciou':
58
- v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
59
- with torch.no_grad():
60
- alpha = v / (v - iou + (1 + self.eps))
61
- iou = iou - (rho2 / c2 + v * alpha)
62
- elif self.iou_type == 'siou':
63
- # SIoU Loss https://arxiv.org/pdf/2205.12740.pdf
64
- s_cw = (b2_x1 + b2_x2 - b1_x1 - b1_x2) * 0.5
65
- s_ch = (b2_y1 + b2_y2 - b1_y1 - b1_y2) * 0.5
66
- sigma = torch.pow(s_cw ** 2 + s_ch ** 2, 0.5)
67
- sin_alpha_1 = torch.abs(s_cw) / sigma
68
- sin_alpha_2 = torch.abs(s_ch) / sigma
69
- threshold = pow(2, 0.5) / 2
70
- sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1)
71
- angle_cost = torch.cos(torch.arcsin(sin_alpha) * 2 - math.pi / 2)
72
- rho_x = (s_cw / cw) ** 2
73
- rho_y = (s_ch / ch) ** 2
74
- gamma = angle_cost - 2
75
- distance_cost = 2 - torch.exp(gamma * rho_x) - torch.exp(gamma * rho_y)
76
- omiga_w = torch.abs(w1 - w2) / torch.max(w1, w2)
77
- omiga_h = torch.abs(h1 - h2) / torch.max(h1, h2)
78
- shape_cost = torch.pow(1 - torch.exp(-1 * omiga_w), 4) + torch.pow(1 - torch.exp(-1 * omiga_h), 4)
79
- iou = iou - 0.5 * (distance_cost + shape_cost)
80
- loss = 1.0 - iou
81
-
82
- if self.reduction == 'sum':
83
- loss = loss.sum()
84
- elif self.reduction == 'mean':
85
- loss = loss.mean()
86
-
87
- return loss
88
-
89
-
90
- def pairwise_bbox_iou(box1, box2, box_format='xywh'):
91
- """Calculate iou.
92
- This code is based on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/utils/boxes.py
93
- """
94
- if box_format == 'xyxy':
95
- lt = torch.max(box1[:, None, :2], box2[:, :2])
96
- rb = torch.min(box1[:, None, 2:], box2[:, 2:])
97
- area_1 = torch.prod(box1[:, 2:] - box1[:, :2], 1)
98
- area_2 = torch.prod(box2[:, 2:] - box2[:, :2], 1)
99
-
100
- elif box_format == 'xywh':
101
- lt = torch.max(
102
- (box1[:, None, :2] - box1[:, None, 2:] / 2),
103
- (box2[:, :2] - box2[:, 2:] / 2),
104
- )
105
- rb = torch.min(
106
- (box1[:, None, :2] + box1[:, None, 2:] / 2),
107
- (box2[:, :2] + box2[:, 2:] / 2),
108
- )
109
-
110
- area_1 = torch.prod(box1[:, 2:], 1)
111
- area_2 = torch.prod(box2[:, 2:], 1)
112
- valid = (lt < rb).type(lt.type()).prod(dim=2)
113
- inter = torch.prod(rb - lt, 2) * valid
114
- return inter / (area_1[:, None] + area_2 - inter)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/utils/general.py DELETED
@@ -1,24 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- import os
4
- import glob
5
- from pathlib import Path
6
-
7
- def increment_name(path):
8
- "increase save directory's id"
9
- path = Path(path)
10
- sep = ''
11
- if path.exists():
12
- path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
13
- for n in range(1, 9999):
14
- p = f'{path}{sep}{n}{suffix}'
15
- if not os.path.exists(p):
16
- break
17
- path = Path(p)
18
- return path
19
-
20
-
21
- def find_latest_checkpoint(search_dir='.'):
22
- # Find the most recent saved checkpoint in search_dir
23
- checkpoint_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
24
- return max(checkpoint_list, key=os.path.getctime) if checkpoint_list else ''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/utils/nms.py DELETED
@@ -1,106 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
- # The code is based on
4
- # https://github.com/ultralytics/yolov5/blob/master/utils/general.py
5
-
6
- import os
7
- import time
8
- import numpy as np
9
- import cv2
10
- import torch
11
- import torchvision
12
-
13
-
14
- # Settings
15
- torch.set_printoptions(linewidth=320, precision=5, profile='long')
16
- np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
17
- cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
18
- os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8)) # NumExpr max threads
19
-
20
-
21
- def xywh2xyxy(x):
22
- # Convert boxes with shape [n, 4] from [x, y, w, h] to [x1, y1, x2, y2] where x1y1 is top-left, x2y2=bottom-right
23
- y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
24
- y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
25
- y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
26
- y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
27
- y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
28
- return y
29
-
30
-
31
- def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300):
32
- """Runs Non-Maximum Suppression (NMS) on inference results.
33
- This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775
34
- Args:
35
- prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
36
- conf_thres: (float) confidence threshold.
37
- iou_thres: (float) iou threshold.
38
- classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
39
- agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
40
- multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
41
- max_det:(int), max number of output bboxes.
42
-
43
- Returns:
44
- list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls].
45
- """
46
-
47
- num_classes = prediction.shape[2] - 5 # number of classes
48
- pred_candidates = prediction[..., 4] > conf_thres # candidates
49
-
50
- # Check the parameters.
51
- assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.'
52
- assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.'
53
-
54
- # Function settings.
55
- max_wh = 4096 # maximum box width and height
56
- max_nms = 30000 # maximum number of boxes put into torchvision.ops.nms()
57
- time_limit = 10.0 # quit the function when nms cost time exceed the limit time.
58
- multi_label &= num_classes > 1 # multiple labels per box
59
-
60
- tik = time.time()
61
- output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
62
- for img_idx, x in enumerate(prediction): # image index, image inference
63
- x = x[pred_candidates[img_idx]] # confidence
64
-
65
- # If no box remains, skip the next process.
66
- if not x.shape[0]:
67
- continue
68
-
69
- # confidence multiply the objectness
70
- x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
71
-
72
- # (center x, center y, width, height) to (x1, y1, x2, y2)
73
- box = xywh2xyxy(x[:, :4])
74
-
75
- # Detections matrix's shape is (n,6), each row represents (xyxy, conf, cls)
76
- if multi_label:
77
- box_idx, class_idx = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
78
- x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float()), 1)
79
- else: # Only keep the class with highest scores.
80
- conf, class_idx = x[:, 5:].max(1, keepdim=True)
81
- x = torch.cat((box, conf, class_idx.float()), 1)[conf.view(-1) > conf_thres]
82
-
83
- # Filter by class, only keep boxes whose category is in classes.
84
- if classes is not None:
85
- x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
86
-
87
- # Check shape
88
- num_box = x.shape[0] # number of boxes
89
- if not num_box: # no boxes kept.
90
- continue
91
- elif num_box > max_nms: # excess max boxes' number.
92
- x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
93
-
94
- # Batched NMS
95
- class_offset = x[:, 5:6] * (0 if agnostic else max_wh) # classes
96
- boxes, scores = x[:, :4] + class_offset, x[:, 4] # boxes (offset by class), scores
97
- keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
98
- if keep_box_idx.shape[0] > max_det: # limit detections
99
- keep_box_idx = keep_box_idx[:max_det]
100
-
101
- output[img_idx] = x[keep_box_idx]
102
- if (time.time() - tik) > time_limit:
103
- print(f'WARNING: NMS cost time exceed the limited {time_limit}s.')
104
- break # time limit exceeded
105
-
106
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
yolov6/utils/torch_utils.py DELETED
@@ -1,110 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding:utf-8 -*-
3
-
4
- import time
5
- from contextlib import contextmanager
6
- from copy import deepcopy
7
- import torch
8
- import torch.distributed as dist
9
- import torch.nn as nn
10
- import torch.nn.functional as F
11
- from yolov6.utils.events import LOGGER
12
-
13
- try:
14
- import thop # for FLOPs computation
15
- except ImportError:
16
- thop = None
17
-
18
-
19
- @contextmanager
20
- def torch_distributed_zero_first(local_rank: int):
21
- """
22
- Decorator to make all processes in distributed training wait for each local_master to do something.
23
- """
24
- if local_rank not in [-1, 0]:
25
- dist.barrier(device_ids=[local_rank])
26
- yield
27
- if local_rank == 0:
28
- dist.barrier(device_ids=[0])
29
-
30
-
31
- def time_sync():
32
- # Waits for all kernels in all streams on a CUDA device to complete if cuda is available.
33
- if torch.cuda.is_available():
34
- torch.cuda.synchronize()
35
- return time.time()
36
-
37
-
38
- def initialize_weights(model):
39
- for m in model.modules():
40
- t = type(m)
41
- if t is nn.Conv2d:
42
- pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
43
- elif t is nn.BatchNorm2d:
44
- m.eps = 1e-3
45
- m.momentum = 0.03
46
- elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
47
- m.inplace = True
48
-
49
-
50
- def fuse_conv_and_bn(conv, bn):
51
- # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
52
- fusedconv = (
53
- nn.Conv2d(
54
- conv.in_channels,
55
- conv.out_channels,
56
- kernel_size=conv.kernel_size,
57
- stride=conv.stride,
58
- padding=conv.padding,
59
- groups=conv.groups,
60
- bias=True,
61
- )
62
- .requires_grad_(False)
63
- .to(conv.weight.device)
64
- )
65
-
66
- # prepare filters
67
- w_conv = conv.weight.clone().view(conv.out_channels, -1)
68
- w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
69
- fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
70
-
71
- # prepare spatial bias
72
- b_conv = (
73
- torch.zeros(conv.weight.size(0), device=conv.weight.device)
74
- if conv.bias is None
75
- else conv.bias
76
- )
77
- b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(
78
- torch.sqrt(bn.running_var + bn.eps)
79
- )
80
- fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
81
-
82
- return fusedconv
83
-
84
-
85
- def fuse_model(model):
86
- from yolov6.layers.common import Conv
87
-
88
- for m in model.modules():
89
- if type(m) is Conv and hasattr(m, "bn"):
90
- m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
91
- delattr(m, "bn") # remove batchnorm
92
- m.forward = m.forward_fuse # update forward
93
- return model
94
-
95
-
96
- def get_model_info(model, img_size=640):
97
- """Get model Params and GFlops.
98
- Code base on https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/utils/model_utils.py
99
- """
100
- from thop import profile
101
- stride = 32
102
- img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device)
103
-
104
- flops, params = profile(deepcopy(model), inputs=(img,), verbose=False)
105
- params /= 1e6
106
- flops /= 1e9
107
- img_size = img_size if isinstance(img_size, list) else [img_size, img_size]
108
- flops *= img_size[0] * img_size[1] / stride / stride * 2 # Gflops
109
- info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops)
110
- return info