Bhaskar Saranga commited on
Commit
59c3a37
1 Parent(s): fd7f189

Added AsOne

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +9 -0
  2. Dockerfile +36 -0
  3. app.py +44 -6
  4. asone/__init__.py +114 -0
  5. asone/asone.py +195 -0
  6. asone/demo_detector.py +85 -0
  7. asone/demo_tracker.py +101 -0
  8. asone/detectors/__init__.py +13 -0
  9. asone/detectors/detector.py +92 -0
  10. asone/detectors/utils/__init__.py +0 -0
  11. asone/detectors/utils/cfg_path.py +18 -0
  12. asone/detectors/utils/exp_name.py +32 -0
  13. asone/detectors/utils/weights_path.py +117 -0
  14. asone/detectors/yolor/__init__.py +2 -0
  15. asone/detectors/yolor/cfg/yolor_csp.cfg +1376 -0
  16. asone/detectors/yolor/cfg/yolor_csp_x.cfg +1576 -0
  17. asone/detectors/yolor/cfg/yolor_p6.cfg +1760 -0
  18. asone/detectors/yolor/models/__init__.py +1 -0
  19. asone/detectors/yolor/models/common.py +1023 -0
  20. asone/detectors/yolor/models/export.py +68 -0
  21. asone/detectors/yolor/models/models.py +761 -0
  22. asone/detectors/yolor/utils/__init__.py +1 -0
  23. asone/detectors/yolor/utils/activations.py +72 -0
  24. asone/detectors/yolor/utils/autoanchor.py +152 -0
  25. asone/detectors/yolor/utils/datasets.py +1297 -0
  26. asone/detectors/yolor/utils/export.py +80 -0
  27. asone/detectors/yolor/utils/general.py +449 -0
  28. asone/detectors/yolor/utils/google_utils.py +132 -0
  29. asone/detectors/yolor/utils/layers.py +532 -0
  30. asone/detectors/yolor/utils/loss.py +173 -0
  31. asone/detectors/yolor/utils/metrics.py +140 -0
  32. asone/detectors/yolor/utils/parse_config.py +71 -0
  33. asone/detectors/yolor/utils/plots.py +380 -0
  34. asone/detectors/yolor/utils/torch_utils.py +240 -0
  35. asone/detectors/yolor/utils/yolor_utils.py +206 -0
  36. asone/detectors/yolor/yolor_detector.py +138 -0
  37. asone/detectors/yolov5/__init__.py +2 -0
  38. asone/detectors/yolov5/yolov5/__init__.py +0 -0
  39. asone/detectors/yolov5/yolov5/models/__init__.py +3 -0
  40. asone/detectors/yolov5/yolov5/models/common.py +756 -0
  41. asone/detectors/yolov5/yolov5/models/experimental.py +56 -0
  42. asone/detectors/yolov5/yolov5/models/general.py +1036 -0
  43. asone/detectors/yolov5/yolov5/models/yolo.py +345 -0
  44. asone/detectors/yolov5/yolov5/utils/__init__.py +0 -0
  45. asone/detectors/yolov5/yolov5/utils/torch_utils.py +354 -0
  46. asone/detectors/yolov5/yolov5/utils/yolov5_utils.py +222 -0
  47. asone/detectors/yolov5/yolov5_detector.py +121 -0
  48. asone/detectors/yolov6/__init__.py +2 -0
  49. asone/detectors/yolov6/yolov6/__init__.py +0 -0
  50. asone/detectors/yolov6/yolov6/assigners/__init__.py +2 -0
.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ .env/
2
+ results/
3
+ **__pycache__**
4
+ *.onnx
5
+ *.pt
6
+ **byte_track_results**
7
+ **deep_sort_results**
8
+ **nor_fair_results**
9
+ test_env/
Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM pytorch/pytorch:latest
2
+
3
+ # Set Time Zone to prevent issues for installing some apt packages
4
+ ENV TZ=Europe/Minsk
5
+ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
6
+
7
+ # install apt packages
8
+ RUN apt-get update -y
9
+ RUN apt-get install git gcc \
10
+ g++ python3-opencv \
11
+ vim -y
12
+
13
+ RUN mkdir /app
14
+ WORKDIR /app
15
+
16
+ ADD asone asone
17
+
18
+ ADD sample_videos sample_videos
19
+ ADD main.py main.py
20
+ # ADD demo.py demo.py
21
+
22
+ ADD setup.py setup.py
23
+ ADD requirements.txt requirements.txt
24
+
25
+
26
+ RUN pip3 install Cython numpy
27
+ RUN pip3 install cython-bbox
28
+ ADD pypi_README.md pypi_README.md
29
+
30
+ RUN pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
31
+ RUN pip3 install .
32
+
33
+
34
+ WORKDIR /workspace
35
+ # Entry Point
36
+ CMD /bin/bash
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import torch
2
  import gradio as gr
3
  import cv2
@@ -11,6 +12,8 @@ from utils.plots import plot_one_box
11
  from utils.torch_utils import time_synchronized
12
  import time
13
  from ultralytics import YOLO
 
 
14
 
15
  def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
16
  # Resize and pad image while meeting stride-multiple constraints
@@ -173,7 +176,31 @@ def inference2(video,model_link,iou_threshold,confidence_threshold):
173
  finalVideo.release()
174
  return 'output.mp4',np.mean(fps_video)
175
 
176
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  examples_images = ['data/images/1.jpg',
179
  'data/images/2.jpg',
@@ -206,11 +233,19 @@ with gr.Blocks() as demo:
206
  video_iou_threshold = gr.Slider(label="IOU Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.45)
207
  video_conf_threshold = gr.Slider(label="Confidence Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.25)
208
  gr.Examples(examples=examples_videos,inputs=video_input,outputs=video_output)
209
- video_button = gr.Button("Detect")
210
-
211
- # with gr.Tab("Webcam Video"):
212
- # gr.Markdown("## YOLOv7 Inference on Webcam Video")
213
- # gr.Markdown("Coming Soon")
 
 
 
 
 
 
 
 
214
 
215
  text_button.click(inference, inputs=[image_input,image_drop,
216
  image_iou_threshold,image_conf_threshold],
@@ -218,5 +253,8 @@ with gr.Blocks() as demo:
218
  video_button.click(inference2, inputs=[video_input,video_drop,
219
  video_iou_threshold,video_conf_threshold],
220
  outputs=[video_output,fps_video])
 
 
 
221
 
222
  demo.launch(debug=True,enable_queue=True)
 
1
+ import os
2
  import torch
3
  import gradio as gr
4
  import cv2
 
12
  from utils.torch_utils import time_synchronized
13
  import time
14
  from ultralytics import YOLO
15
+ import asone
16
+ from asone import ASOne
17
 
18
  def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
19
  # Resize and pad image while meeting stride-multiple constraints
 
176
  finalVideo.release()
177
  return 'output.mp4',np.mean(fps_video)
178
 
179
+ def inference3(video,model_link,iou_threshold,confidence_threshold):
180
+ model_path = 'weights/'+str(model_link)+'.pt'
181
+ device = torch.cuda.is_available()
182
+ dt_obj = ASOne(
183
+ tracker=asone.BYTETRACK,
184
+ detector=asone.YOLOV8M_PYTORCH,
185
+ weights=model_path,
186
+ use_cuda=device
187
+ )
188
+ track_fn = dt_obj.track_video(video,
189
+ conf_thres=confidence_threshold,
190
+ iou_thres=iou_threshold,
191
+ display=False,
192
+ draw_trails=None,
193
+ filter_classes=None,
194
+ class_names=None) # class_names=['License Plate'] for custom weights
195
+ fps_a=[]
196
+ for bbox_details, frame_details in track_fn:
197
+ #bbox_xyxy, ids, scores, class_ids = bbox_details
198
+ frame, frame_num, fps = frame_details
199
+ fps_a.append(fps)
200
+ print(frame_num)
201
+
202
+ file_name=os.path.basename(video)
203
+ return f'data/results/{file_name}', np.mean(fps_a)
204
 
205
  examples_images = ['data/images/1.jpg',
206
  'data/images/2.jpg',
 
233
  video_iou_threshold = gr.Slider(label="IOU Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.45)
234
  video_conf_threshold = gr.Slider(label="Confidence Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.25)
235
  gr.Examples(examples=examples_videos,inputs=video_input,outputs=video_output)
236
+ video_button = gr.Button("Detect")
237
+ with gr.Tab("Tracking"):
238
+ gr.Markdown("## Multi object tracking")
239
+
240
+ with gr.Row():
241
+ track_input = gr.Video(type='pil', label="Input Video", source="upload")
242
+ track_output = gr.Video(type="pil", label="Output Video",format="mp4")
243
+ track_fps_video = gr.Number(0,label='FPS')
244
+ track_drop = gr.Dropdown(choices=models,value=models[0])
245
+ track_iou_threshold = gr.Slider(label="IOU Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.45)
246
+ track_conf_threshold = gr.Slider(label="Confidence Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.25)
247
+ gr.Examples(examples=examples_videos,inputs=track_input,outputs=track_output)
248
+ track_button = gr.Button("Detect")
249
 
250
  text_button.click(inference, inputs=[image_input,image_drop,
251
  image_iou_threshold,image_conf_threshold],
 
253
  video_button.click(inference2, inputs=[video_input,video_drop,
254
  video_iou_threshold,video_conf_threshold],
255
  outputs=[video_output,fps_video])
256
+ track_button.click(inference3, inputs=[track_input,track_drop,
257
+ track_iou_threshold,track_conf_threshold],
258
+ outputs=[track_output,track_fps_video])
259
 
260
  demo.launch(debug=True,enable_queue=True)
asone/__init__.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .asone import ASOne
2
+ import asone.detectors
3
+ import asone.trackers
4
+
5
+
6
+ BYTETRACK = 0
7
+ DEEPSORT = 1
8
+ NORFAIR = 2
9
+
10
+
11
+ YOLOV5X6_PYTORCH = 0
12
+ YOLOV5S_PYTORCH = 2
13
+ YOLOV5N_PYTORCH = 4
14
+ YOLOV5M_PYTORCH = 6
15
+ YOLOV5L_PYTORCH = 8
16
+ YOLOV5X_PYTORCH = 10
17
+ YOLOV5N6_PYTORCH = 12
18
+ YOLOV5S6_PYTORCH = 14
19
+ YOLOV5M6_PYTORCH = 16
20
+ YOLOV5L6_PYTORCH = 18
21
+
22
+
23
+ YOLOV6N_PYTORCH = 20
24
+ YOLOV6T_PYTORCH = 22
25
+ YOLOV6S_PYTORCH = 24
26
+ YOLOV6M_PYTORCH = 26
27
+ YOLOV6L_PYTORCH = 28
28
+ YOLOV6L_RELU_PYTORCH = 30
29
+ YOLOV6S_REPOPT_PYTORCH = 32
30
+
31
+ YOLOV7_TINY_PYTORCH = 34
32
+ YOLOV7_PYTORCH = 36
33
+ YOLOV7_X_PYTORCH = 38
34
+ YOLOV7_W6_PYTORCH = 40
35
+ YOLOV7_E6_PYTORCH = 42
36
+ YOLOV7_D6_PYTORCH = 44
37
+ YOLOV7_E6E_PYTORCH = 46
38
+
39
+ YOLOR_CSP_X_PYTORCH = 48
40
+ YOLOR_CSP_X_STAR_PYTORCH = 50
41
+ YOLOR_CSP_STAR_PYTORCH = 52
42
+ YOLOR_CSP_PYTORCH = 54
43
+ YOLOR_P6_PYTORCH = 56
44
+
45
+
46
+
47
+
48
+ YOLOX_L_PYTORCH = 58
49
+ YOLOX_NANO_PYTORCH = 60
50
+ YOLOX_TINY_PYTORCH = 62
51
+ YOLOX_DARKNET_PYTORCH = 64
52
+ YOLOX_S_PYTORCH = 66
53
+ YOLOX_M_PYTORCH = 68
54
+ YOLOX_X_PYTORCH = 70
55
+
56
+ #ONNX
57
+
58
+ YOLOV5X6_ONNX = 1
59
+ YOLOV5S_ONNX = 3
60
+ YOLOV5N_ONNX = 5
61
+ YOLOV5M_ONNX = 7
62
+ YOLOV5L_ONNX = 9
63
+ YOLOV5X_ONNX = 11
64
+ YOLOV5N6_ONNX = 13
65
+ YOLOV5S6_ONNX = 15
66
+ YOLOV5M6_ONNX = 17
67
+ YOLOV5L6_ONNX = 19
68
+
69
+
70
+ YOLOV6N_ONNX = 21
71
+ YOLOV6T_ONNX = 23
72
+ YOLOV6S_ONNX = 25
73
+ YOLOV6M_ONNX = 27
74
+ YOLOV6L_ONNX = 29
75
+ YOLOV6L_RELU_ONNX = 31
76
+ YOLOV6S_REPOPT_ONNX = 33
77
+
78
+ YOLOV7_TINY_ONNX = 35
79
+ YOLOV7_ONNX = 37
80
+ YOLOV7_X_ONNX = 39
81
+ YOLOV7_W6_ONNX = 41
82
+ YOLOV7_E6_ONNX = 43
83
+ YOLOV7_D6_ONNX = 45
84
+ YOLOV7_E6E_ONNX = 47
85
+
86
+ YOLOR_CSP_X_ONNX = 49
87
+ YOLOR_CSP_X_STAR_ONNX = 51
88
+ YOLOR_CSP_STAR_ONNX = 53
89
+ YOLOR_CSP_ONNX = 55
90
+ YOLOR_P6_ONNX = 57
91
+
92
+
93
+ YOLOX_L_ONNX = 59
94
+ YOLOX_NANO_ONNX = 61
95
+ YOLOX_TINY_ONNX = 63
96
+ YOLOX_DARKNET_ONNX = 65
97
+ YOLOX_S_ONNX = 67
98
+ YOLOX_M_ONNX = 69
99
+ YOLOX_X_ONNX = 71
100
+
101
+ # YOLOv8
102
+ YOLOV8N_PYTORCH = 72
103
+ YOLOV8N_ONNX = 73
104
+ YOLOV8S_PYTORCH = 74
105
+ YOLOV8S_ONNX = 75
106
+ YOLOV8M_PYTORCH = 76
107
+ YOLOV8M_ONNX = 77
108
+ YOLOV8L_PYTORCH = 78
109
+ YOLOV8L_ONNX = 79
110
+ YOLOV8X_PYTORCH = 80
111
+ YOLOV8X_ONNX = 81
112
+
113
+
114
+ __all__ = ['ASOne', 'detectors', 'trackers']
asone/asone.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import cv2
3
+ from loguru import logger
4
+ import os
5
+ import time
6
+ import asone.utils as utils
7
+ from asone.trackers import Tracker
8
+ from asone.detectors import Detector
9
+ from asone.utils.default_cfg import config
10
+ import numpy as np
11
+
12
+ class ASOne:
13
+ def __init__(self,
14
+ detector: int = 0,
15
+ tracker: int = -1,
16
+ weights: str = None,
17
+ use_cuda: bool = True) -> None:
18
+
19
+ self.use_cuda = use_cuda
20
+
21
+ # get detector object
22
+ self.detector = self.get_detector(detector, weights)
23
+
24
+ if tracker == -1:
25
+ self.tracker = None
26
+ return
27
+
28
+ self.tracker = self.get_tracker(tracker)
29
+
30
+
31
+ def get_detector(self, detector: int, weights: str):
32
+ detector = Detector(detector, weights=weights,
33
+ use_cuda=self.use_cuda).get_detector()
34
+ return detector
35
+
36
+ def get_tracker(self, tracker: int):
37
+
38
+ tracker = Tracker(tracker, self.detector,
39
+ use_cuda=self.use_cuda)
40
+ return tracker
41
+
42
+ def _update_args(self, kwargs):
43
+ for key, value in kwargs.items():
44
+ if key in config.keys():
45
+ config[key] = value
46
+ else:
47
+ print(f'"{key}" argument not found! valid args: {list(config.keys())}')
48
+ exit()
49
+ return config
50
+
51
+ def track_stream(self,
52
+ stream_url,
53
+ **kwargs
54
+ ):
55
+
56
+ output_filename = 'result.mp4'
57
+ kwargs['filename'] = output_filename
58
+ config = self._update_args(kwargs)
59
+
60
+ for (bbox_details, frame_details) in self._start_tracking(stream_url, config):
61
+ # yeild bbox_details, frame_details to main script
62
+ yield bbox_details, frame_details
63
+
64
+
65
+ def track_video(self,
66
+ video_path,
67
+ **kwargs
68
+ ):
69
+ output_filename = os.path.basename(video_path)
70
+ kwargs['filename'] = output_filename
71
+ config = self._update_args(kwargs)
72
+
73
+ for (bbox_details, frame_details) in self._start_tracking(video_path, config):
74
+ # yeild bbox_details, frame_details to main script
75
+ yield bbox_details, frame_details
76
+
77
+ def detect(self, source, **kwargs)->np.ndarray:
78
+ """ Function to perform detection on an img
79
+
80
+ Args:
81
+ source (_type_): if str read the image. if nd.array pass it directly to detect
82
+
83
+ Returns:
84
+ _type_: ndarray of detection
85
+ """
86
+ if isinstance(source, str):
87
+ source = cv2.imread(source)
88
+ return self.detector.detect(source, **kwargs)
89
+
90
+ def track_webcam(self,
91
+ cam_id=0,
92
+ **kwargs):
93
+ output_filename = 'results.mp4'
94
+
95
+ kwargs['filename'] = output_filename
96
+ kwargs['fps'] = 29
97
+ config = self._update_args(kwargs)
98
+
99
+
100
+ for (bbox_details, frame_details) in self._start_tracking(cam_id, config):
101
+ # yeild bbox_details, frame_details to main script
102
+ yield bbox_details, frame_details
103
+
104
+ def _start_tracking(self,
105
+ stream_path: str,
106
+ config: dict) -> tuple:
107
+ if not self.tracker:
108
+ print(f'No tracker is selected. use detect() function perform detcetion or pass a tracker.')
109
+ exit()
110
+
111
+ fps = config.pop('fps')
112
+ output_dir = config.pop('output_dir')
113
+ filename = config.pop('filename')
114
+ save_result = config.pop('save_result')
115
+ display = config.pop('display')
116
+ draw_trails = config.pop('draw_trails')
117
+ class_names = config.pop('class_names')
118
+
119
+ cap = cv2.VideoCapture(stream_path)
120
+ width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
121
+ height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
122
+ frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
123
+
124
+ if fps is None:
125
+ fps = cap.get(cv2.CAP_PROP_FPS)
126
+
127
+ if save_result:
128
+ os.makedirs(output_dir, exist_ok=True)
129
+ save_path = os.path.join(output_dir, filename)
130
+ logger.info(f"video save path is {save_path}")
131
+
132
+ video_writer = cv2.VideoWriter(
133
+ save_path,
134
+ cv2.VideoWriter_fourcc(*"mp4v"),
135
+ fps,
136
+ (int(width), int(height)),
137
+ )
138
+
139
+ frame_id = 1
140
+ tic = time.time()
141
+
142
+ prevTime = 0
143
+
144
+ while True:
145
+ start_time = time.time()
146
+
147
+ ret, frame = cap.read()
148
+ if not ret:
149
+ break
150
+ im0 = copy.deepcopy(frame)
151
+
152
+ bboxes_xyxy, ids, scores, class_ids = self.tracker.detect_and_track(
153
+ frame, config)
154
+ elapsed_time = time.time() - start_time
155
+
156
+ logger.info(
157
+ 'frame {}/{} ({:.2f} ms)'.format(frame_id, int(frame_count),
158
+ elapsed_time * 1000))
159
+
160
+ im0 = utils.draw_boxes(im0,
161
+ bboxes_xyxy,
162
+ class_ids,
163
+ identities=ids,
164
+ draw_trails=draw_trails,
165
+ class_names=class_names)
166
+
167
+ currTime = time.time()
168
+ fps = 1 / (currTime - prevTime)
169
+ prevTime = currTime
170
+ cv2.line(im0, (20, 25), (127, 25), [85, 45, 255], 30)
171
+ cv2.putText(im0, f'FPS: {int(fps)}', (11, 35), 0, 1, [
172
+ 225, 255, 255], thickness=2, lineType=cv2.LINE_AA)
173
+
174
+ if display:
175
+ cv2.imshow(' Sample', im0)
176
+ if save_result:
177
+ video_writer.write(im0)
178
+
179
+ frame_id += 1
180
+
181
+ if cv2.waitKey(25) & 0xFF == ord('q'):
182
+ break
183
+
184
+ # yeild required values in form of (bbox_details, frames_details)
185
+ yield (bboxes_xyxy, ids, scores, class_ids), (im0 if display else frame, frame_id-1, fps)
186
+
187
+ tac = time.time()
188
+ print(f'Total Time Taken: {tac - tic:.2f}')
189
+
190
+ if __name__ == '__main__':
191
+ # asone = ASOne(tracker='norfair')
192
+ asone = ASOne()
193
+
194
+ asone.start_tracking('data/sample_videos/video2.mp4',
195
+ save_result=True, display=False)
asone/demo_detector.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asone
2
+ from asone import ASOne
3
+ from .utils import draw_boxes
4
+ import cv2
5
+ import argparse
6
+ import time
7
+ import os
8
+
9
+ def main(args):
10
+ filter_classes = args.filter_classes
11
+ video_path = args.video
12
+
13
+ os.makedirs(args.output_path, exist_ok=True)
14
+
15
+ if filter_classes:
16
+ filter_classes = filter_classes.split(',')
17
+
18
+
19
+ detector = ASOne(asone.YOLOV7_PYTORCH, weights=args.weights, use_cuda=args.use_cuda)
20
+
21
+ cap = cv2.VideoCapture(video_path)
22
+ width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
23
+ height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
24
+ FPS = cap.get(cv2.CAP_PROP_FPS)
25
+
26
+ if args.save:
27
+ video_writer = cv2.VideoWriter(
28
+ os.path.basename(video_path),
29
+ cv2.VideoWriter_fourcc(*"mp4v"),
30
+ FPS,
31
+ (int(width), int(height)),
32
+ )
33
+
34
+ frame_no = 1
35
+ tic = time.time()
36
+
37
+ prevTime = 0
38
+
39
+ while True:
40
+ start_time = time.time()
41
+
42
+ ret, img = cap.read()
43
+ if not ret:
44
+ break
45
+ frame = img.copy()
46
+
47
+ dets, img_info = detector.detect(img, conf_thres=0.25, iou_thres=0.45)
48
+ currTime = time.time()
49
+ fps = 1 / (currTime - prevTime)
50
+ prevTime = currTime
51
+
52
+ if dets is not None:
53
+ bbox_xyxy = dets[:, :4]
54
+ scores = dets[:, 4]
55
+ class_ids = dets[:, 5]
56
+ img = draw_boxes(img, bbox_xyxy, class_ids=class_ids)
57
+
58
+ cv2.line(img, (20, 25), (127, 25), [85, 45, 255], 30)
59
+ cv2.putText(img, f'FPS: {int(fps)}', (11, 35), 0, 1, [
60
+ 225, 255, 255], thickness=2, lineType=cv2.LINE_AA)
61
+
62
+
63
+ frame_no+=1
64
+ if args.display:
65
+ cv2.imshow('Window', img)
66
+
67
+ if args.save:
68
+ video_writer.write(img)
69
+
70
+ if cv2.waitKey(25) & 0xFF == ord('q'):
71
+ break
72
+
73
+ if __name__=='__main__':
74
+
75
+ parser = argparse.ArgumentParser()
76
+ parser.add_argument("video", help="Path of video")
77
+ parser.add_argument('--cpu', default=True, action='store_false', dest='use_cuda', help='If provided the model will run on cpu otherwise it will run on gpu')
78
+ parser.add_argument('--filter_classes', default=None, help='Class names seperated by comma (,). e.g. person,car ')
79
+ parser.add_argument('-w', '--weights', default=None, help='Path of trained weights')
80
+ parser.add_argument('-o', '--output_path', default='data/results', help='path of output file')
81
+ parser.add_argument('--no_display', action='store_false', default=True, dest='display', help='if provided video will not be displayed')
82
+ parser.add_argument('--no_save', action='store_false', default=True, dest='save', help='if provided video will not be saved')
83
+
84
+ args = parser.parse_args()
85
+ main(args)
asone/demo_tracker.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from .trackers import Tracker
3
+ import argparse
4
+ import asone
5
+ from .utils import draw_boxes
6
+ from .detectors import Detector
7
+ import cv2
8
+ import os
9
+ from loguru import logger
10
+ import time
11
+ import copy
12
+
13
+ def main(args):
14
+ filter_classes = args.filter_classes
15
+
16
+ if filter_classes:
17
+ filter_classes = filter_classes.split(',')
18
+
19
+ detector = Detector(asone.YOLOV7_E6_ONNX, weights=args.weights, use_cuda=args.use_cuda).get_detector()
20
+ tracker = Tracker(asone.BYTETRACK, detector, use_cuda=args.use_cuda).get_tracker()
21
+
22
+ cap = cv2.VideoCapture(args.video_path)
23
+ width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
24
+ height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
25
+ fps = cap.get(cv2.CAP_PROP_FPS)
26
+ frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
27
+ output_dir = 'data/results'
28
+ if args.save_results:
29
+ os.makedirs(output_dir, exist_ok=True)
30
+ save_path = os.path.join(output_dir, os.path.basename(args.video_path))
31
+ logger.info(f"video save path is {save_path}")
32
+
33
+ video_writer = cv2.VideoWriter(
34
+ save_path,
35
+ cv2.VideoWriter_fourcc(*"mp4v"),
36
+ fps,
37
+ (int(width), int(height)),
38
+ )
39
+
40
+ frame_id = 1
41
+ tic = time.time()
42
+
43
+ prevTime = 0
44
+
45
+ while True:
46
+ start_time = time.time()
47
+
48
+ ret, frame = cap.read()
49
+ if not ret:
50
+ break
51
+ im0 = copy.deepcopy(frame)
52
+
53
+ bboxes_xyxy, ids, scores, class_ids = tracker.detect_and_track(
54
+ frame, filter_classes=filter_classes)
55
+
56
+ elapsed_time = time.time() - start_time
57
+
58
+ logger.info(
59
+ f'frame {frame_id}/{int(frame_count)} {elapsed_time * 1000:.2f} ms')
60
+
61
+ im0 = draw_boxes(im0, bboxes_xyxy, class_ids, identities=ids)
62
+
63
+ currTime = time.time()
64
+ fps = 1 / (currTime - prevTime)
65
+ prevTime = currTime
66
+ cv2.line(im0, (20, 25), (127, 25), [85, 45, 255], 30)
67
+ cv2.putText(im0, f'FPS: {int(fps)}', (11, 35), 0, 1, [
68
+ 225, 255, 255], thickness=2, lineType=cv2.LINE_AA)
69
+
70
+ if args.display:
71
+ cv2.imshow(' Sample', im0)
72
+ if args.save_results:
73
+ video_writer.write(im0)
74
+
75
+ frame_id += 1
76
+
77
+ if cv2.waitKey(25) & 0xFF == ord('q'):
78
+ break
79
+
80
+ tac = time.time()
81
+ print(f'Total Time Taken: {tac - tic:.2f}')
82
+
83
+
84
+
85
+ if __name__ == '__main__':
86
+ parser = argparse.ArgumentParser()
87
+
88
+ parser.add_argument('video_path', help='Path to input video')
89
+ parser.add_argument('--cpu', default=True,
90
+ action='store_false', dest='use_cuda', help='run on cpu')
91
+ parser.add_argument('--no_display', default=True,
92
+ action='store_false', dest='display', help='Disable display')
93
+ parser.add_argument('--no_save', default=True,
94
+ action='store_false', dest='save_results', help='Disable result saving')
95
+
96
+ parser.add_argument('--filter_classes', default=None, help='Class names seperated by comma (,). e.g. person,car ')
97
+ parser.add_argument('-w', '--weights', default=None, help='Path of trained weights')
98
+
99
+ args = parser.parse_args()
100
+
101
+ main(args)
asone/detectors/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from asone.detectors.yolov5 import YOLOv5Detector
2
+ from asone.detectors.yolov6 import YOLOv6Detector
3
+ from asone.detectors.yolov7 import YOLOv7Detector
4
+ from asone.detectors.yolor import YOLOrDetector
5
+ from asone.detectors.yolox import YOLOxDetector
6
+
7
+ from asone.detectors.detector import Detector
8
+ __all__ = ['Detector'
9
+ 'YOLOv5Detector',
10
+ 'YOLOv6Detector',
11
+ 'YOLOv7Detector',
12
+ 'YOLOrDetector',
13
+ 'YOLOxDetector']
asone/detectors/detector.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+
3
+ from asone.detectors.yolov5 import YOLOv5Detector
4
+ from asone.detectors.yolov6 import YOLOv6Detector
5
+ from asone.detectors.yolov7 import YOLOv7Detector
6
+ from asone.detectors.yolor import YOLOrDetector
7
+ from asone.detectors.yolox import YOLOxDetector
8
+
9
+ from asone.detectors.utils.weights_path import get_weight_path
10
+ from asone.detectors.utils.cfg_path import get_cfg_path
11
+ from asone.detectors.utils.exp_name import get_exp__name
12
+ from .yolov8 import YOLOv8Detector
13
+
14
+
15
+ class Detector:
16
+ def __init__(self,
17
+ model_flag: int,
18
+ weights: str = None,
19
+ use_cuda: bool = True):
20
+
21
+ self.model = self._select_detector(model_flag, weights, use_cuda)
22
+
23
+ def _select_detector(self, model_flag, weights, cuda):
24
+ # Get required weight using model_flag
25
+ if weights and weights.split('.')[-1] == 'onnx':
26
+ onnx = True
27
+ weight = weights
28
+ elif weights:
29
+ onnx = False
30
+ weight = weights
31
+ else:
32
+ onnx, weight = get_weight_path(model_flag)
33
+
34
+ if model_flag in range(0, 20):
35
+ _detector = YOLOv5Detector(weights=weight,
36
+ use_onnx=onnx,
37
+ use_cuda=cuda)
38
+ elif model_flag in range(20, 34):
39
+ _detector = YOLOv6Detector(weights=weight,
40
+ use_onnx=onnx,
41
+ use_cuda=cuda)
42
+ elif model_flag in range(34, 48):
43
+ _detector = YOLOv7Detector(weights=weight,
44
+ use_onnx=onnx,
45
+ use_cuda=cuda)
46
+ elif model_flag in range(48, 58):
47
+ # Get Configuration file for Yolor
48
+ if model_flag in range(48, 57, 2):
49
+ cfg = get_cfg_path(model_flag)
50
+ else:
51
+ cfg = None
52
+ _detector = YOLOrDetector(weights=weight,
53
+ cfg=cfg,
54
+ use_onnx=onnx,
55
+ use_cuda=cuda)
56
+
57
+ elif model_flag in range(58, 72):
58
+ # Get exp file and corresponding model for pytorch only
59
+ if model_flag in range(58, 71, 2):
60
+ exp, model_name = get_exp__name(model_flag)
61
+ else:
62
+ exp = model_name = None
63
+ _detector = YOLOxDetector(model_name=model_name,
64
+ exp_file=exp,
65
+ weights=weight,
66
+ use_onnx=onnx,
67
+ use_cuda=cuda)
68
+ elif model_flag in range(72, 82):
69
+ # Get exp file and corresponding model for pytorch only
70
+ _detector = YOLOv8Detector(weights=weight,
71
+ use_onnx=onnx,
72
+ use_cuda=cuda)
73
+
74
+ return _detector
75
+
76
+ def get_detector(self):
77
+ return self.model
78
+
79
+ def detect(self,
80
+ image: list,
81
+ **kwargs: dict):
82
+ return self.model.detect(image, **kwargs)
83
+
84
+
85
+ if __name__ == '__main__':
86
+
87
+ # Initialize YOLOv6 object detector
88
+ model_type = 56
89
+ result = Detector(model_flag=model_type, use_cuda=True)
90
+ img = cv2.imread('asone/asone-linux/test.jpeg')
91
+ pred = result.get_detector(img)
92
+ print(pred)
asone/detectors/utils/__init__.py ADDED
File without changes
asone/detectors/utils/cfg_path.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ cfg_dir = os.path.dirname(os.path.dirname(__file__))
4
+
5
+ configuration = {'0': os.path.join(cfg_dir, 'yolor','cfg','yolor_csp_x.cfg'),
6
+ '1': os.path.join(cfg_dir, 'yolor','cfg','yolor_csp.cfg'),
7
+ '2': os.path.join(cfg_dir, 'yolor','cfg','yolor_p6.cfg')}
8
+
9
+ def get_cfg_path(model_flag):
10
+ if model_flag in [48,50]:
11
+ cfg = configuration['0']
12
+ if model_flag in [52,54]:
13
+ cfg = configuration['1']
14
+ if model_flag == 56:
15
+ cfg = configuration['2']
16
+ return cfg
17
+
18
+
asone/detectors/utils/exp_name.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ exp_dir = os.path.dirname(os.path.dirname(__file__))
4
+
5
+ exp_file_name = {'58': (os.path.join(exp_dir, 'yolox','exps','yolox_l.py'),'yolox-l'),
6
+ '60': (os.path.join(exp_dir, 'yolox','exps','yolox_nano.py'),'yolox-nano'),
7
+ '62': (os.path.join(exp_dir, 'yolox','exps','yolox_tiny'),'yolox-tiny'),
8
+ '64': (os.path.join(exp_dir, 'yolox','exps','yolov3.py'),'yolox-darknet'),
9
+ '66': (os.path.join(exp_dir, 'yolox','exps','yolox_s.py'),'yolox-s'),
10
+ '68': (os.path.join(exp_dir, 'yolox','exps','yolox_m.py'),'yolox-m'),
11
+ '70': (os.path.join(exp_dir, 'yolox','exps','yolox_x.py'),'yolox-x')
12
+ }
13
+
14
+
15
+ def get_exp__name(model_flag):
16
+
17
+ if model_flag == 58:
18
+ exp, model_name = exp_file_name['58'][0], exp_file_name['58'][1]
19
+ elif model_flag == 60:
20
+ exp, model_name = exp_file_name['60'][0], exp_file_name['60'][1]
21
+ elif model_flag == 62:
22
+ exp, model_name = exp_file_name['62'][0], exp_file_name['62'][1]
23
+ elif model_flag == 64:
24
+ exp, model_name = exp_file_name['64'][0], exp_file_name['64'][1]
25
+ elif model_flag == 66:
26
+ exp, model_name = exp_file_name['66'][0], exp_file_name['66'][1]
27
+ elif model_flag == 68:
28
+ exp, model_name = exp_file_name['68'][0], exp_file_name['68'][1]
29
+ elif model_flag == 70:
30
+ exp, model_name = exp_file_name['70'][0], exp_file_name['70'][1]
31
+
32
+ return exp, model_name
asone/detectors/utils/weights_path.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ weights = { '0': os.path.join('yolov5','weights','yolov5x6.pt'),
4
+ '1': os.path.join('yolov5','weights','yolov5x6.onnx'),
5
+ '2': os.path.join('yolov5','weights','yolov5s.pt'),
6
+ '3': os.path.join('yolov5','weights','yolov5s.onnx'),
7
+ '4': os.path.join('yolov5','weights','yolov5n.pt'),
8
+ '5': os.path.join('yolov5','weights','yolov5n.onnx'),
9
+ '6': os.path.join('yolov5','weights','yolov5m.pt'),
10
+ '7': os.path.join('yolov5','weights','yolov5m.onnx'),
11
+ '8': os.path.join('yolov5','weights','yolov5l.pt'),
12
+ '9': os.path.join('yolov5','weights','yolov5l.onnx'),
13
+ '10': os.path.join('yolov5','weights','yolov5x.pt'),
14
+ '11': os.path.join('yolov5','weights','yolov5x.onnx'),
15
+ '12': os.path.join('yolov5','weights','yolov5n6.pt'),
16
+ '13': os.path.join('yolov5','weights','yolov5n6.onnx'),
17
+ '14': os.path.join('yolov5','weights','yolov5s6.pt'),
18
+ '15': os.path.join('yolov5','weights','yolov5s6.onnx'),
19
+ '16': os.path.join('yolov5','weights','yolov5m6.pt'),
20
+ '17': os.path.join('yolov5','weights','yolov5m6.onnx'),
21
+ '18': os.path.join('yolov5','weights','yolov5l6.pt'),
22
+ '19': os.path.join('yolov5','weights','yolov5l6.onnx'),
23
+ # YOLOv6
24
+ '20': os.path.join('yolov6','weights','yolov6n.pt'),
25
+ '21': os.path.join('yolov6','weights','yolov6n.onnx'),
26
+ '22': os.path.join('yolov6','weights','yolov6t.pt'),
27
+ '23': os.path.join('yolov6','weights','yolov6t.onnx'),
28
+ '24': os.path.join('yolov6','weights','yolov6s.pt'),
29
+ '25': os.path.join('yolov6','weights','yolov6s.onnx'),
30
+ '26': os.path.join('yolov6','weights','yolov6m.pt'),
31
+ '27': os.path.join('yolov6','weights','yolov6m.onnx'),
32
+ '28': os.path.join('yolov6','weights','yolov6l.pt'),
33
+ '29': os.path.join('yolov6','weights','yolov6l.onnx'),
34
+ '30': os.path.join('yolov6','weights','yolov6l_relu.pt'),
35
+ '31': os.path.join('yolov6','weights','yolov6l_relu.onnx'),
36
+ '32': os.path.join('yolov6','weights','yolov6s_repopt.pt'),
37
+ '33': os.path.join('yolov6','weights','yolov6s_repopt.onnx'),
38
+ # YOLOv7
39
+ '34': os.path.join('yolov7','weights','yolov7-tiny.pt'),
40
+ '35': os.path.join('yolov7','weights','yolov7-tiny.onnx'),
41
+ '36': os.path.join('yolov7','weights','yolov7.pt'),
42
+ '37': os.path.join('yolov7','weights','yolov7.onnx'),
43
+ '38': os.path.join('yolov7','weights','yolov7x.pt'),
44
+ '39': os.path.join('yolov7','weights','yolov7x.onnx'),
45
+ '40': os.path.join('yolov7','weights','yolov7-w6.pt'),
46
+ '41': os.path.join('yolov7','weights','yolov7-w6.onnx'),
47
+ '42': os.path.join('yolov7','weights','yolov7-e6.pt'),
48
+ '43': os.path.join('yolov7','weights','yolov7-e6.onnx'),
49
+ '44': os.path.join('yolov7','weights','yolov7-d6.pt'),
50
+ '45': os.path.join('yolov7','weights','yolov7-d6.onnx'),
51
+ '46': os.path.join('yolov7','weights','yolov7-e6e.pt'),
52
+ '47': os.path.join('yolov7','weights','yolov7-e6e.onnx'),
53
+ # YOLOR
54
+ '48': os.path.join('yolor','weights','yolor_csp_x.pt'),
55
+ '49': os.path.join('yolor','weights','yolor_csp_x.onnx'),
56
+ '50': os.path.join('yolor','weights','yolor_csp_x_star.pt'),
57
+ '51': os.path.join('yolor','weights','yolor_csp_x_star.onnx'),
58
+ '52': os.path.join('yolor','weights','yolor_csp_star.pt'),
59
+ '53': os.path.join('yolor','weights','yolor_csp_star.onnx'),
60
+ '54': os.path.join('yolor','weights','yolor_csp.pt'),
61
+ '55': os.path.join('yolor','weights','yolor_csp.onnx'),
62
+ '56': os.path.join('yolor','weights','yolor_p6.pt'),
63
+ '57': os.path.join('yolor','weights','yolor_p6.onnx'),
64
+ # YOLOX
65
+ '58': os.path.join('yolox','weights','yolox_l.pth'),
66
+ '59': os.path.join('yolox','weights','yolox_l.onnx'),
67
+ '60': os.path.join('yolox','weights','yolox_nano.pth'),
68
+ '61': os.path.join('yolox','weights','yolox_nano.onnx'),
69
+ '62': os.path.join('yolox','weights','yolox_tiny.pth'),
70
+ '63': os.path.join('yolox','weights','yolox_tiny.onnx'),
71
+ '64': os.path.join('yolox','weights','yolox_darknet.pth'),
72
+ '65': os.path.join('yolox','weights','yolox_darknet.onnx'),
73
+ '66': os.path.join('yolox','weights','yolox_s.pth'),
74
+ '67': os.path.join('yolox','weights','yolox_s.onnx'),
75
+ '68': os.path.join('yolox','weights','yolox_m.pth'),
76
+ '69': os.path.join('yolox','weights','yolox_m.onnx'),
77
+ '70': os.path.join('yolox','weights','yolox_x.pth'),
78
+ '71': os.path.join('yolox','weights','yolox_x.onnx'),
79
+ # YOLOv8
80
+ '72': os.path.join('yolov8','weights','yolov8n.pt'),
81
+ '73': os.path.join('yolov8','weights','yolov8n.onnx'),
82
+ '74': os.path.join('yolov8','weights','yolov8s.pt'),
83
+ '75': os.path.join('yolov8','weights','yolov8s.onnx'),
84
+ '76': os.path.join('yolov8','weights','yolov8m.pt'),
85
+ '77': os.path.join('yolov8','weights','yolov8m.onnx'),
86
+ '78': os.path.join('yolov8','weights','yolov8l.pt'),
87
+ '79': os.path.join('yolov8','weights','yolov8l.onnx'),
88
+ '80': os.path.join('yolov8','weights','yolov8x.pt'),
89
+ '81': os.path.join('yolov8','weights','yolov8x.onnx')
90
+
91
+
92
+
93
+ }
94
+
95
+ def get_weight_path(model_flag):
96
+ if model_flag in range(0, 20):
97
+ onnx = False if (model_flag % 2 == 0) else True
98
+ weight = weights[str(model_flag)]
99
+ elif model_flag in range(20, 34):
100
+ onnx = False if (model_flag % 2 == 0) else True
101
+ weight = weights[str(model_flag)]
102
+ elif model_flag in range(34, 48):
103
+ onnx = False if (model_flag % 2 == 0) else True
104
+ weight = weights[str(model_flag)]
105
+ elif model_flag in range(48, 58):
106
+ onnx = False if (model_flag % 2 == 0) else True
107
+ weight = weights[str(model_flag)]
108
+ elif model_flag in range(58, 72):
109
+ onnx = False if (model_flag % 2 == 0) else True
110
+ weight = weights[str(model_flag)]
111
+ elif model_flag in range(72, 82):
112
+ onnx = False if (model_flag % 2 == 0) else True
113
+ weight = weights[str(model_flag)]
114
+
115
+
116
+ return onnx, weight
117
+
asone/detectors/yolor/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .yolor_detector import YOLOrDetector
2
+ __all__ = ['YOLOrDetector']
asone/detectors/yolor/cfg/yolor_csp.cfg ADDED
@@ -0,0 +1,1376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ #batch=1
4
+ #subdivisions=1
5
+ # Training
6
+ batch=64
7
+ subdivisions=8
8
+ width=512
9
+ height=512
10
+ channels=3
11
+ momentum=0.949
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.00261
19
+ burn_in=1000
20
+ max_batches = 500500
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ #cutmix=1
26
+ mosaic=1
27
+
28
+
29
+ # ============ Backbone ============ #
30
+
31
+ # Stem
32
+
33
+ # 0
34
+ [convolutional]
35
+ batch_normalize=1
36
+ filters=32
37
+ size=3
38
+ stride=1
39
+ pad=1
40
+ activation=silu
41
+
42
+ # P1
43
+
44
+ # Downsample
45
+
46
+ [convolutional]
47
+ batch_normalize=1
48
+ filters=64
49
+ size=3
50
+ stride=2
51
+ pad=1
52
+ activation=silu
53
+
54
+ # Residual Block
55
+
56
+ [convolutional]
57
+ batch_normalize=1
58
+ filters=32
59
+ size=1
60
+ stride=1
61
+ pad=1
62
+ activation=silu
63
+
64
+ [convolutional]
65
+ batch_normalize=1
66
+ filters=64
67
+ size=3
68
+ stride=1
69
+ pad=1
70
+ activation=silu
71
+
72
+ # 4 (previous+1+3k)
73
+ [shortcut]
74
+ from=-3
75
+ activation=linear
76
+
77
+ # P2
78
+
79
+ # Downsample
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=128
84
+ size=3
85
+ stride=2
86
+ pad=1
87
+ activation=silu
88
+
89
+ # Split
90
+
91
+ [convolutional]
92
+ batch_normalize=1
93
+ filters=64
94
+ size=1
95
+ stride=1
96
+ pad=1
97
+ activation=silu
98
+
99
+ [route]
100
+ layers = -2
101
+
102
+ [convolutional]
103
+ batch_normalize=1
104
+ filters=64
105
+ size=1
106
+ stride=1
107
+ pad=1
108
+ activation=silu
109
+
110
+ # Residual Block
111
+
112
+ [convolutional]
113
+ batch_normalize=1
114
+ filters=64
115
+ size=1
116
+ stride=1
117
+ pad=1
118
+ activation=silu
119
+
120
+ [convolutional]
121
+ batch_normalize=1
122
+ filters=64
123
+ size=3
124
+ stride=1
125
+ pad=1
126
+ activation=silu
127
+
128
+ [shortcut]
129
+ from=-3
130
+ activation=linear
131
+
132
+ [convolutional]
133
+ batch_normalize=1
134
+ filters=64
135
+ size=1
136
+ stride=1
137
+ pad=1
138
+ activation=silu
139
+
140
+ [convolutional]
141
+ batch_normalize=1
142
+ filters=64
143
+ size=3
144
+ stride=1
145
+ pad=1
146
+ activation=silu
147
+
148
+ [shortcut]
149
+ from=-3
150
+ activation=linear
151
+
152
+ # Transition first
153
+
154
+ [convolutional]
155
+ batch_normalize=1
156
+ filters=64
157
+ size=1
158
+ stride=1
159
+ pad=1
160
+ activation=silu
161
+
162
+ # Merge [-1, -(3k+4)]
163
+
164
+ [route]
165
+ layers = -1,-10
166
+
167
+ # Transition last
168
+
169
+ # 17 (previous+7+3k)
170
+ [convolutional]
171
+ batch_normalize=1
172
+ filters=128
173
+ size=1
174
+ stride=1
175
+ pad=1
176
+ activation=silu
177
+
178
+ # P3
179
+
180
+ # Downsample
181
+
182
+ [convolutional]
183
+ batch_normalize=1
184
+ filters=256
185
+ size=3
186
+ stride=2
187
+ pad=1
188
+ activation=silu
189
+
190
+ # Split
191
+
192
+ [convolutional]
193
+ batch_normalize=1
194
+ filters=128
195
+ size=1
196
+ stride=1
197
+ pad=1
198
+ activation=silu
199
+
200
+ [route]
201
+ layers = -2
202
+
203
+ [convolutional]
204
+ batch_normalize=1
205
+ filters=128
206
+ size=1
207
+ stride=1
208
+ pad=1
209
+ activation=silu
210
+
211
+ # Residual Block
212
+
213
+ [convolutional]
214
+ batch_normalize=1
215
+ filters=128
216
+ size=1
217
+ stride=1
218
+ pad=1
219
+ activation=silu
220
+
221
+ [convolutional]
222
+ batch_normalize=1
223
+ filters=128
224
+ size=3
225
+ stride=1
226
+ pad=1
227
+ activation=silu
228
+
229
+ [shortcut]
230
+ from=-3
231
+ activation=linear
232
+
233
+ [convolutional]
234
+ batch_normalize=1
235
+ filters=128
236
+ size=1
237
+ stride=1
238
+ pad=1
239
+ activation=silu
240
+
241
+ [convolutional]
242
+ batch_normalize=1
243
+ filters=128
244
+ size=3
245
+ stride=1
246
+ pad=1
247
+ activation=silu
248
+
249
+ [shortcut]
250
+ from=-3
251
+ activation=linear
252
+
253
+ [convolutional]
254
+ batch_normalize=1
255
+ filters=128
256
+ size=1
257
+ stride=1
258
+ pad=1
259
+ activation=silu
260
+
261
+ [convolutional]
262
+ batch_normalize=1
263
+ filters=128
264
+ size=3
265
+ stride=1
266
+ pad=1
267
+ activation=silu
268
+
269
+ [shortcut]
270
+ from=-3
271
+ activation=linear
272
+
273
+ [convolutional]
274
+ batch_normalize=1
275
+ filters=128
276
+ size=1
277
+ stride=1
278
+ pad=1
279
+ activation=silu
280
+
281
+ [convolutional]
282
+ batch_normalize=1
283
+ filters=128
284
+ size=3
285
+ stride=1
286
+ pad=1
287
+ activation=silu
288
+
289
+ [shortcut]
290
+ from=-3
291
+ activation=linear
292
+
293
+ [convolutional]
294
+ batch_normalize=1
295
+ filters=128
296
+ size=1
297
+ stride=1
298
+ pad=1
299
+ activation=silu
300
+
301
+ [convolutional]
302
+ batch_normalize=1
303
+ filters=128
304
+ size=3
305
+ stride=1
306
+ pad=1
307
+ activation=silu
308
+
309
+ [shortcut]
310
+ from=-3
311
+ activation=linear
312
+
313
+ [convolutional]
314
+ batch_normalize=1
315
+ filters=128
316
+ size=1
317
+ stride=1
318
+ pad=1
319
+ activation=silu
320
+
321
+ [convolutional]
322
+ batch_normalize=1
323
+ filters=128
324
+ size=3
325
+ stride=1
326
+ pad=1
327
+ activation=silu
328
+
329
+ [shortcut]
330
+ from=-3
331
+ activation=linear
332
+
333
+ [convolutional]
334
+ batch_normalize=1
335
+ filters=128
336
+ size=1
337
+ stride=1
338
+ pad=1
339
+ activation=silu
340
+
341
+ [convolutional]
342
+ batch_normalize=1
343
+ filters=128
344
+ size=3
345
+ stride=1
346
+ pad=1
347
+ activation=silu
348
+
349
+ [shortcut]
350
+ from=-3
351
+ activation=linear
352
+
353
+ [convolutional]
354
+ batch_normalize=1
355
+ filters=128
356
+ size=1
357
+ stride=1
358
+ pad=1
359
+ activation=silu
360
+
361
+ [convolutional]
362
+ batch_normalize=1
363
+ filters=128
364
+ size=3
365
+ stride=1
366
+ pad=1
367
+ activation=silu
368
+
369
+ [shortcut]
370
+ from=-3
371
+ activation=linear
372
+
373
+ # Transition first
374
+
375
+ [convolutional]
376
+ batch_normalize=1
377
+ filters=128
378
+ size=1
379
+ stride=1
380
+ pad=1
381
+ activation=silu
382
+
383
+ # Merge [-1 -(4+3k)]
384
+
385
+ [route]
386
+ layers = -1,-28
387
+
388
+ # Transition last
389
+
390
+ # 48 (previous+7+3k)
391
+ [convolutional]
392
+ batch_normalize=1
393
+ filters=256
394
+ size=1
395
+ stride=1
396
+ pad=1
397
+ activation=silu
398
+
399
+ # P4
400
+
401
+ # Downsample
402
+
403
+ [convolutional]
404
+ batch_normalize=1
405
+ filters=512
406
+ size=3
407
+ stride=2
408
+ pad=1
409
+ activation=silu
410
+
411
+ # Split
412
+
413
+ [convolutional]
414
+ batch_normalize=1
415
+ filters=256
416
+ size=1
417
+ stride=1
418
+ pad=1
419
+ activation=silu
420
+
421
+ [route]
422
+ layers = -2
423
+
424
+ [convolutional]
425
+ batch_normalize=1
426
+ filters=256
427
+ size=1
428
+ stride=1
429
+ pad=1
430
+ activation=silu
431
+
432
+ # Residual Block
433
+
434
+ [convolutional]
435
+ batch_normalize=1
436
+ filters=256
437
+ size=1
438
+ stride=1
439
+ pad=1
440
+ activation=silu
441
+
442
+ [convolutional]
443
+ batch_normalize=1
444
+ filters=256
445
+ size=3
446
+ stride=1
447
+ pad=1
448
+ activation=silu
449
+
450
+ [shortcut]
451
+ from=-3
452
+ activation=linear
453
+
454
+ [convolutional]
455
+ batch_normalize=1
456
+ filters=256
457
+ size=1
458
+ stride=1
459
+ pad=1
460
+ activation=silu
461
+
462
+ [convolutional]
463
+ batch_normalize=1
464
+ filters=256
465
+ size=3
466
+ stride=1
467
+ pad=1
468
+ activation=silu
469
+
470
+ [shortcut]
471
+ from=-3
472
+ activation=linear
473
+
474
+ [convolutional]
475
+ batch_normalize=1
476
+ filters=256
477
+ size=1
478
+ stride=1
479
+ pad=1
480
+ activation=silu
481
+
482
+ [convolutional]
483
+ batch_normalize=1
484
+ filters=256
485
+ size=3
486
+ stride=1
487
+ pad=1
488
+ activation=silu
489
+
490
+ [shortcut]
491
+ from=-3
492
+ activation=linear
493
+
494
+ [convolutional]
495
+ batch_normalize=1
496
+ filters=256
497
+ size=1
498
+ stride=1
499
+ pad=1
500
+ activation=silu
501
+
502
+ [convolutional]
503
+ batch_normalize=1
504
+ filters=256
505
+ size=3
506
+ stride=1
507
+ pad=1
508
+ activation=silu
509
+
510
+ [shortcut]
511
+ from=-3
512
+ activation=linear
513
+
514
+ [convolutional]
515
+ batch_normalize=1
516
+ filters=256
517
+ size=1
518
+ stride=1
519
+ pad=1
520
+ activation=silu
521
+
522
+ [convolutional]
523
+ batch_normalize=1
524
+ filters=256
525
+ size=3
526
+ stride=1
527
+ pad=1
528
+ activation=silu
529
+
530
+ [shortcut]
531
+ from=-3
532
+ activation=linear
533
+
534
+ [convolutional]
535
+ batch_normalize=1
536
+ filters=256
537
+ size=1
538
+ stride=1
539
+ pad=1
540
+ activation=silu
541
+
542
+ [convolutional]
543
+ batch_normalize=1
544
+ filters=256
545
+ size=3
546
+ stride=1
547
+ pad=1
548
+ activation=silu
549
+
550
+ [shortcut]
551
+ from=-3
552
+ activation=linear
553
+
554
+ [convolutional]
555
+ batch_normalize=1
556
+ filters=256
557
+ size=1
558
+ stride=1
559
+ pad=1
560
+ activation=silu
561
+
562
+ [convolutional]
563
+ batch_normalize=1
564
+ filters=256
565
+ size=3
566
+ stride=1
567
+ pad=1
568
+ activation=silu
569
+
570
+ [shortcut]
571
+ from=-3
572
+ activation=linear
573
+
574
+ [convolutional]
575
+ batch_normalize=1
576
+ filters=256
577
+ size=1
578
+ stride=1
579
+ pad=1
580
+ activation=silu
581
+
582
+ [convolutional]
583
+ batch_normalize=1
584
+ filters=256
585
+ size=3
586
+ stride=1
587
+ pad=1
588
+ activation=silu
589
+
590
+ [shortcut]
591
+ from=-3
592
+ activation=linear
593
+
594
+ # Transition first
595
+
596
+ [convolutional]
597
+ batch_normalize=1
598
+ filters=256
599
+ size=1
600
+ stride=1
601
+ pad=1
602
+ activation=silu
603
+
604
+ # Merge [-1 -(3k+4)]
605
+
606
+ [route]
607
+ layers = -1,-28
608
+
609
+ # Transition last
610
+
611
+ # 79 (previous+7+3k)
612
+ [convolutional]
613
+ batch_normalize=1
614
+ filters=512
615
+ size=1
616
+ stride=1
617
+ pad=1
618
+ activation=silu
619
+
620
+ # P5
621
+
622
+ # Downsample
623
+
624
+ [convolutional]
625
+ batch_normalize=1
626
+ filters=1024
627
+ size=3
628
+ stride=2
629
+ pad=1
630
+ activation=silu
631
+
632
+ # Split
633
+
634
+ [convolutional]
635
+ batch_normalize=1
636
+ filters=512
637
+ size=1
638
+ stride=1
639
+ pad=1
640
+ activation=silu
641
+
642
+ [route]
643
+ layers = -2
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ filters=512
648
+ size=1
649
+ stride=1
650
+ pad=1
651
+ activation=silu
652
+
653
+ # Residual Block
654
+
655
+ [convolutional]
656
+ batch_normalize=1
657
+ filters=512
658
+ size=1
659
+ stride=1
660
+ pad=1
661
+ activation=silu
662
+
663
+ [convolutional]
664
+ batch_normalize=1
665
+ filters=512
666
+ size=3
667
+ stride=1
668
+ pad=1
669
+ activation=silu
670
+
671
+ [shortcut]
672
+ from=-3
673
+ activation=linear
674
+
675
+ [convolutional]
676
+ batch_normalize=1
677
+ filters=512
678
+ size=1
679
+ stride=1
680
+ pad=1
681
+ activation=silu
682
+
683
+ [convolutional]
684
+ batch_normalize=1
685
+ filters=512
686
+ size=3
687
+ stride=1
688
+ pad=1
689
+ activation=silu
690
+
691
+ [shortcut]
692
+ from=-3
693
+ activation=linear
694
+
695
+ [convolutional]
696
+ batch_normalize=1
697
+ filters=512
698
+ size=1
699
+ stride=1
700
+ pad=1
701
+ activation=silu
702
+
703
+ [convolutional]
704
+ batch_normalize=1
705
+ filters=512
706
+ size=3
707
+ stride=1
708
+ pad=1
709
+ activation=silu
710
+
711
+ [shortcut]
712
+ from=-3
713
+ activation=linear
714
+
715
+ [convolutional]
716
+ batch_normalize=1
717
+ filters=512
718
+ size=1
719
+ stride=1
720
+ pad=1
721
+ activation=silu
722
+
723
+ [convolutional]
724
+ batch_normalize=1
725
+ filters=512
726
+ size=3
727
+ stride=1
728
+ pad=1
729
+ activation=silu
730
+
731
+ [shortcut]
732
+ from=-3
733
+ activation=linear
734
+
735
+ # Transition first
736
+
737
+ [convolutional]
738
+ batch_normalize=1
739
+ filters=512
740
+ size=1
741
+ stride=1
742
+ pad=1
743
+ activation=silu
744
+
745
+ # Merge [-1 -(3k+4)]
746
+
747
+ [route]
748
+ layers = -1,-16
749
+
750
+ # Transition last
751
+
752
+ # 98 (previous+7+3k)
753
+ [convolutional]
754
+ batch_normalize=1
755
+ filters=1024
756
+ size=1
757
+ stride=1
758
+ pad=1
759
+ activation=silu
760
+
761
+ # ============ End of Backbone ============ #
762
+
763
+ # ============ Neck ============ #
764
+
765
+ # CSPSPP
766
+
767
+ [convolutional]
768
+ batch_normalize=1
769
+ filters=512
770
+ size=1
771
+ stride=1
772
+ pad=1
773
+ activation=silu
774
+
775
+ [route]
776
+ layers = -2
777
+
778
+ [convolutional]
779
+ batch_normalize=1
780
+ filters=512
781
+ size=1
782
+ stride=1
783
+ pad=1
784
+ activation=silu
785
+
786
+ [convolutional]
787
+ batch_normalize=1
788
+ size=3
789
+ stride=1
790
+ pad=1
791
+ filters=512
792
+ activation=silu
793
+
794
+ [convolutional]
795
+ batch_normalize=1
796
+ filters=512
797
+ size=1
798
+ stride=1
799
+ pad=1
800
+ activation=silu
801
+
802
+ ### SPP ###
803
+ [maxpool]
804
+ stride=1
805
+ size=5
806
+
807
+ [route]
808
+ layers=-2
809
+
810
+ [maxpool]
811
+ stride=1
812
+ size=9
813
+
814
+ [route]
815
+ layers=-4
816
+
817
+ [maxpool]
818
+ stride=1
819
+ size=13
820
+
821
+ [route]
822
+ layers=-1,-3,-5,-6
823
+ ### End SPP ###
824
+
825
+ [convolutional]
826
+ batch_normalize=1
827
+ filters=512
828
+ size=1
829
+ stride=1
830
+ pad=1
831
+ activation=silu
832
+
833
+ [convolutional]
834
+ batch_normalize=1
835
+ size=3
836
+ stride=1
837
+ pad=1
838
+ filters=512
839
+ activation=silu
840
+
841
+ [route]
842
+ layers = -1, -13
843
+
844
+ # 113 (previous+6+5+2k)
845
+ [convolutional]
846
+ batch_normalize=1
847
+ filters=512
848
+ size=1
849
+ stride=1
850
+ pad=1
851
+ activation=silu
852
+
853
+ # End of CSPSPP
854
+
855
+
856
+ # FPN-4
857
+
858
+ [convolutional]
859
+ batch_normalize=1
860
+ filters=256
861
+ size=1
862
+ stride=1
863
+ pad=1
864
+ activation=silu
865
+
866
+ [upsample]
867
+ stride=2
868
+
869
+ [route]
870
+ layers = 79
871
+
872
+ [convolutional]
873
+ batch_normalize=1
874
+ filters=256
875
+ size=1
876
+ stride=1
877
+ pad=1
878
+ activation=silu
879
+
880
+ [route]
881
+ layers = -1, -3
882
+
883
+ [convolutional]
884
+ batch_normalize=1
885
+ filters=256
886
+ size=1
887
+ stride=1
888
+ pad=1
889
+ activation=silu
890
+
891
+ # Split
892
+
893
+ [convolutional]
894
+ batch_normalize=1
895
+ filters=256
896
+ size=1
897
+ stride=1
898
+ pad=1
899
+ activation=silu
900
+
901
+ [route]
902
+ layers = -2
903
+
904
+ # Plain Block
905
+
906
+ [convolutional]
907
+ batch_normalize=1
908
+ filters=256
909
+ size=1
910
+ stride=1
911
+ pad=1
912
+ activation=silu
913
+
914
+ [convolutional]
915
+ batch_normalize=1
916
+ size=3
917
+ stride=1
918
+ pad=1
919
+ filters=256
920
+ activation=silu
921
+
922
+ [convolutional]
923
+ batch_normalize=1
924
+ filters=256
925
+ size=1
926
+ stride=1
927
+ pad=1
928
+ activation=silu
929
+
930
+ [convolutional]
931
+ batch_normalize=1
932
+ size=3
933
+ stride=1
934
+ pad=1
935
+ filters=256
936
+ activation=silu
937
+
938
+ # Merge [-1, -(2k+2)]
939
+
940
+ [route]
941
+ layers = -1, -6
942
+
943
+ # Transition last
944
+
945
+ # 127 (previous+6+4+2k)
946
+ [convolutional]
947
+ batch_normalize=1
948
+ filters=256
949
+ size=1
950
+ stride=1
951
+ pad=1
952
+ activation=silu
953
+
954
+
955
+ # FPN-3
956
+
957
+ [convolutional]
958
+ batch_normalize=1
959
+ filters=128
960
+ size=1
961
+ stride=1
962
+ pad=1
963
+ activation=silu
964
+
965
+ [upsample]
966
+ stride=2
967
+
968
+ [route]
969
+ layers = 48
970
+
971
+ [convolutional]
972
+ batch_normalize=1
973
+ filters=128
974
+ size=1
975
+ stride=1
976
+ pad=1
977
+ activation=silu
978
+
979
+ [route]
980
+ layers = -1, -3
981
+
982
+ [convolutional]
983
+ batch_normalize=1
984
+ filters=128
985
+ size=1
986
+ stride=1
987
+ pad=1
988
+ activation=silu
989
+
990
+ # Split
991
+
992
+ [convolutional]
993
+ batch_normalize=1
994
+ filters=128
995
+ size=1
996
+ stride=1
997
+ pad=1
998
+ activation=silu
999
+
1000
+ [route]
1001
+ layers = -2
1002
+
1003
+ # Plain Block
1004
+
1005
+ [convolutional]
1006
+ batch_normalize=1
1007
+ filters=128
1008
+ size=1
1009
+ stride=1
1010
+ pad=1
1011
+ activation=silu
1012
+
1013
+ [convolutional]
1014
+ batch_normalize=1
1015
+ size=3
1016
+ stride=1
1017
+ pad=1
1018
+ filters=128
1019
+ activation=silu
1020
+
1021
+ [convolutional]
1022
+ batch_normalize=1
1023
+ filters=128
1024
+ size=1
1025
+ stride=1
1026
+ pad=1
1027
+ activation=silu
1028
+
1029
+ [convolutional]
1030
+ batch_normalize=1
1031
+ size=3
1032
+ stride=1
1033
+ pad=1
1034
+ filters=128
1035
+ activation=silu
1036
+
1037
+ # Merge [-1, -(2k+2)]
1038
+
1039
+ [route]
1040
+ layers = -1, -6
1041
+
1042
+ # Transition last
1043
+
1044
+ # 141 (previous+6+4+2k)
1045
+ [convolutional]
1046
+ batch_normalize=1
1047
+ filters=128
1048
+ size=1
1049
+ stride=1
1050
+ pad=1
1051
+ activation=silu
1052
+
1053
+
1054
+ # PAN-4
1055
+
1056
+ [convolutional]
1057
+ batch_normalize=1
1058
+ size=3
1059
+ stride=2
1060
+ pad=1
1061
+ filters=256
1062
+ activation=silu
1063
+
1064
+ [route]
1065
+ layers = -1, 127
1066
+
1067
+ [convolutional]
1068
+ batch_normalize=1
1069
+ filters=256
1070
+ size=1
1071
+ stride=1
1072
+ pad=1
1073
+ activation=silu
1074
+
1075
+ # Split
1076
+
1077
+ [convolutional]
1078
+ batch_normalize=1
1079
+ filters=256
1080
+ size=1
1081
+ stride=1
1082
+ pad=1
1083
+ activation=silu
1084
+
1085
+ [route]
1086
+ layers = -2
1087
+
1088
+ # Plain Block
1089
+
1090
+ [convolutional]
1091
+ batch_normalize=1
1092
+ filters=256
1093
+ size=1
1094
+ stride=1
1095
+ pad=1
1096
+ activation=silu
1097
+
1098
+ [convolutional]
1099
+ batch_normalize=1
1100
+ size=3
1101
+ stride=1
1102
+ pad=1
1103
+ filters=256
1104
+ activation=silu
1105
+
1106
+ [convolutional]
1107
+ batch_normalize=1
1108
+ filters=256
1109
+ size=1
1110
+ stride=1
1111
+ pad=1
1112
+ activation=silu
1113
+
1114
+ [convolutional]
1115
+ batch_normalize=1
1116
+ size=3
1117
+ stride=1
1118
+ pad=1
1119
+ filters=256
1120
+ activation=silu
1121
+
1122
+ [route]
1123
+ layers = -1,-6
1124
+
1125
+ # Transition last
1126
+
1127
+ # 152 (previous+3+4+2k)
1128
+ [convolutional]
1129
+ batch_normalize=1
1130
+ filters=256
1131
+ size=1
1132
+ stride=1
1133
+ pad=1
1134
+ activation=silu
1135
+
1136
+
1137
+ # PAN-5
1138
+
1139
+ [convolutional]
1140
+ batch_normalize=1
1141
+ size=3
1142
+ stride=2
1143
+ pad=1
1144
+ filters=512
1145
+ activation=silu
1146
+
1147
+ [route]
1148
+ layers = -1, 113
1149
+
1150
+ [convolutional]
1151
+ batch_normalize=1
1152
+ filters=512
1153
+ size=1
1154
+ stride=1
1155
+ pad=1
1156
+ activation=silu
1157
+
1158
+ # Split
1159
+
1160
+ [convolutional]
1161
+ batch_normalize=1
1162
+ filters=512
1163
+ size=1
1164
+ stride=1
1165
+ pad=1
1166
+ activation=silu
1167
+
1168
+ [route]
1169
+ layers = -2
1170
+
1171
+ # Plain Block
1172
+
1173
+ [convolutional]
1174
+ batch_normalize=1
1175
+ filters=512
1176
+ size=1
1177
+ stride=1
1178
+ pad=1
1179
+ activation=silu
1180
+
1181
+ [convolutional]
1182
+ batch_normalize=1
1183
+ size=3
1184
+ stride=1
1185
+ pad=1
1186
+ filters=512
1187
+ activation=silu
1188
+
1189
+ [convolutional]
1190
+ batch_normalize=1
1191
+ filters=512
1192
+ size=1
1193
+ stride=1
1194
+ pad=1
1195
+ activation=silu
1196
+
1197
+ [convolutional]
1198
+ batch_normalize=1
1199
+ size=3
1200
+ stride=1
1201
+ pad=1
1202
+ filters=512
1203
+ activation=silu
1204
+
1205
+ [route]
1206
+ layers = -1,-6
1207
+
1208
+ # Transition last
1209
+
1210
+ # 163 (previous+3+4+2k)
1211
+ [convolutional]
1212
+ batch_normalize=1
1213
+ filters=512
1214
+ size=1
1215
+ stride=1
1216
+ pad=1
1217
+ activation=silu
1218
+
1219
+ # ============ End of Neck ============ #
1220
+
1221
+ # 164
1222
+ [implicit_add]
1223
+ filters=256
1224
+
1225
+ # 165
1226
+ [implicit_add]
1227
+ filters=512
1228
+
1229
+ # 166
1230
+ [implicit_add]
1231
+ filters=1024
1232
+
1233
+ # 167
1234
+ [implicit_mul]
1235
+ filters=255
1236
+
1237
+ # 168
1238
+ [implicit_mul]
1239
+ filters=255
1240
+
1241
+ # 169
1242
+ [implicit_mul]
1243
+ filters=255
1244
+
1245
+ # ============ Head ============ #
1246
+
1247
+ # YOLO-3
1248
+
1249
+ [route]
1250
+ layers = 141
1251
+
1252
+ [convolutional]
1253
+ batch_normalize=1
1254
+ size=3
1255
+ stride=1
1256
+ pad=1
1257
+ filters=256
1258
+ activation=silu
1259
+
1260
+ [shift_channels]
1261
+ from=164
1262
+
1263
+ [convolutional]
1264
+ size=1
1265
+ stride=1
1266
+ pad=1
1267
+ filters=255
1268
+ activation=linear
1269
+
1270
+ [control_channels]
1271
+ from=167
1272
+
1273
+ [yolo]
1274
+ mask = 0,1,2
1275
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1276
+ classes=80
1277
+ num=9
1278
+ jitter=.3
1279
+ ignore_thresh = .7
1280
+ truth_thresh = 1
1281
+ random=1
1282
+ scale_x_y = 1.05
1283
+ iou_thresh=0.213
1284
+ cls_normalizer=1.0
1285
+ iou_normalizer=0.07
1286
+ iou_loss=ciou
1287
+ nms_kind=greedynms
1288
+ beta_nms=0.6
1289
+
1290
+
1291
+ # YOLO-4
1292
+
1293
+ [route]
1294
+ layers = 152
1295
+
1296
+ [convolutional]
1297
+ batch_normalize=1
1298
+ size=3
1299
+ stride=1
1300
+ pad=1
1301
+ filters=512
1302
+ activation=silu
1303
+
1304
+ [shift_channels]
1305
+ from=165
1306
+
1307
+ [convolutional]
1308
+ size=1
1309
+ stride=1
1310
+ pad=1
1311
+ filters=255
1312
+ activation=linear
1313
+
1314
+ [control_channels]
1315
+ from=168
1316
+
1317
+ [yolo]
1318
+ mask = 3,4,5
1319
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1320
+ classes=80
1321
+ num=9
1322
+ jitter=.3
1323
+ ignore_thresh = .7
1324
+ truth_thresh = 1
1325
+ random=1
1326
+ scale_x_y = 1.05
1327
+ iou_thresh=0.213
1328
+ cls_normalizer=1.0
1329
+ iou_normalizer=0.07
1330
+ iou_loss=ciou
1331
+ nms_kind=greedynms
1332
+ beta_nms=0.6
1333
+
1334
+
1335
+ # YOLO-5
1336
+
1337
+ [route]
1338
+ layers = 163
1339
+
1340
+ [convolutional]
1341
+ batch_normalize=1
1342
+ size=3
1343
+ stride=1
1344
+ pad=1
1345
+ filters=1024
1346
+ activation=silu
1347
+
1348
+ [shift_channels]
1349
+ from=166
1350
+
1351
+ [convolutional]
1352
+ size=1
1353
+ stride=1
1354
+ pad=1
1355
+ filters=255
1356
+ activation=linear
1357
+
1358
+ [control_channels]
1359
+ from=169
1360
+
1361
+ [yolo]
1362
+ mask = 6,7,8
1363
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1364
+ classes=80
1365
+ num=9
1366
+ jitter=.3
1367
+ ignore_thresh = .7
1368
+ truth_thresh = 1
1369
+ random=1
1370
+ scale_x_y = 1.05
1371
+ iou_thresh=0.213
1372
+ cls_normalizer=1.0
1373
+ iou_normalizer=0.07
1374
+ iou_loss=ciou
1375
+ nms_kind=greedynms
1376
+ beta_nms=0.6
asone/detectors/yolor/cfg/yolor_csp_x.cfg ADDED
@@ -0,0 +1,1576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ # Testing
3
+ #batch=1
4
+ #subdivisions=1
5
+ # Training
6
+ batch=64
7
+ subdivisions=8
8
+ width=512
9
+ height=512
10
+ channels=3
11
+ momentum=0.949
12
+ decay=0.0005
13
+ angle=0
14
+ saturation = 1.5
15
+ exposure = 1.5
16
+ hue=.1
17
+
18
+ learning_rate=0.00261
19
+ burn_in=1000
20
+ max_batches = 500500
21
+ policy=steps
22
+ steps=400000,450000
23
+ scales=.1,.1
24
+
25
+ #cutmix=1
26
+ mosaic=1
27
+
28
+
29
+ # ============ Backbone ============ #
30
+
31
+ # Stem
32
+
33
+ # 0
34
+ [convolutional]
35
+ batch_normalize=1
36
+ filters=32
37
+ size=3
38
+ stride=1
39
+ pad=1
40
+ activation=silu
41
+
42
+ # P1
43
+
44
+ # Downsample
45
+
46
+ [convolutional]
47
+ batch_normalize=1
48
+ filters=80
49
+ size=3
50
+ stride=2
51
+ pad=1
52
+ activation=silu
53
+
54
+ # Residual Block
55
+
56
+ [convolutional]
57
+ batch_normalize=1
58
+ filters=40
59
+ size=1
60
+ stride=1
61
+ pad=1
62
+ activation=silu
63
+
64
+ [convolutional]
65
+ batch_normalize=1
66
+ filters=80
67
+ size=3
68
+ stride=1
69
+ pad=1
70
+ activation=silu
71
+
72
+ # 4 (previous+1+3k)
73
+ [shortcut]
74
+ from=-3
75
+ activation=linear
76
+
77
+ # P2
78
+
79
+ # Downsample
80
+
81
+ [convolutional]
82
+ batch_normalize=1
83
+ filters=160
84
+ size=3
85
+ stride=2
86
+ pad=1
87
+ activation=silu
88
+
89
+ # Split
90
+
91
+ [convolutional]
92
+ batch_normalize=1
93
+ filters=80
94
+ size=1
95
+ stride=1
96
+ pad=1
97
+ activation=silu
98
+
99
+ [route]
100
+ layers = -2
101
+
102
+ [convolutional]
103
+ batch_normalize=1
104
+ filters=80
105
+ size=1
106
+ stride=1
107
+ pad=1
108
+ activation=silu
109
+
110
+ # Residual Block
111
+
112
+ [convolutional]
113
+ batch_normalize=1
114
+ filters=80
115
+ size=1
116
+ stride=1
117
+ pad=1
118
+ activation=silu
119
+
120
+ [convolutional]
121
+ batch_normalize=1
122
+ filters=80
123
+ size=3
124
+ stride=1
125
+ pad=1
126
+ activation=silu
127
+
128
+ [shortcut]
129
+ from=-3
130
+ activation=linear
131
+
132
+ [convolutional]
133
+ batch_normalize=1
134
+ filters=80
135
+ size=1
136
+ stride=1
137
+ pad=1
138
+ activation=silu
139
+
140
+ [convolutional]
141
+ batch_normalize=1
142
+ filters=80
143
+ size=3
144
+ stride=1
145
+ pad=1
146
+ activation=silu
147
+
148
+ [shortcut]
149
+ from=-3
150
+ activation=linear
151
+
152
+ [convolutional]
153
+ batch_normalize=1
154
+ filters=80
155
+ size=1
156
+ stride=1
157
+ pad=1
158
+ activation=silu
159
+
160
+ [convolutional]
161
+ batch_normalize=1
162
+ filters=80
163
+ size=3
164
+ stride=1
165
+ pad=1
166
+ activation=silu
167
+
168
+ [shortcut]
169
+ from=-3
170
+ activation=linear
171
+
172
+ # Transition first
173
+
174
+ [convolutional]
175
+ batch_normalize=1
176
+ filters=80
177
+ size=1
178
+ stride=1
179
+ pad=1
180
+ activation=silu
181
+
182
+ # Merge [-1, -(3k+4)]
183
+
184
+ [route]
185
+ layers = -1,-13
186
+
187
+ # Transition last
188
+
189
+ # 20 (previous+7+3k)
190
+ [convolutional]
191
+ batch_normalize=1
192
+ filters=160
193
+ size=1
194
+ stride=1
195
+ pad=1
196
+ activation=silu
197
+
198
+ # P3
199
+
200
+ # Downsample
201
+
202
+ [convolutional]
203
+ batch_normalize=1
204
+ filters=320
205
+ size=3
206
+ stride=2
207
+ pad=1
208
+ activation=silu
209
+
210
+ # Split
211
+
212
+ [convolutional]
213
+ batch_normalize=1
214
+ filters=160
215
+ size=1
216
+ stride=1
217
+ pad=1
218
+ activation=silu
219
+
220
+ [route]
221
+ layers = -2
222
+
223
+ [convolutional]
224
+ batch_normalize=1
225
+ filters=160
226
+ size=1
227
+ stride=1
228
+ pad=1
229
+ activation=silu
230
+
231
+ # Residual Block
232
+
233
+ [convolutional]
234
+ batch_normalize=1
235
+ filters=160
236
+ size=1
237
+ stride=1
238
+ pad=1
239
+ activation=silu
240
+
241
+ [convolutional]
242
+ batch_normalize=1
243
+ filters=160
244
+ size=3
245
+ stride=1
246
+ pad=1
247
+ activation=silu
248
+
249
+ [shortcut]
250
+ from=-3
251
+ activation=linear
252
+
253
+ [convolutional]
254
+ batch_normalize=1
255
+ filters=160
256
+ size=1
257
+ stride=1
258
+ pad=1
259
+ activation=silu
260
+
261
+ [convolutional]
262
+ batch_normalize=1
263
+ filters=160
264
+ size=3
265
+ stride=1
266
+ pad=1
267
+ activation=silu
268
+
269
+ [shortcut]
270
+ from=-3
271
+ activation=linear
272
+
273
+ [convolutional]
274
+ batch_normalize=1
275
+ filters=160
276
+ size=1
277
+ stride=1
278
+ pad=1
279
+ activation=silu
280
+
281
+ [convolutional]
282
+ batch_normalize=1
283
+ filters=160
284
+ size=3
285
+ stride=1
286
+ pad=1
287
+ activation=silu
288
+
289
+ [shortcut]
290
+ from=-3
291
+ activation=linear
292
+
293
+ [convolutional]
294
+ batch_normalize=1
295
+ filters=160
296
+ size=1
297
+ stride=1
298
+ pad=1
299
+ activation=silu
300
+
301
+ [convolutional]
302
+ batch_normalize=1
303
+ filters=160
304
+ size=3
305
+ stride=1
306
+ pad=1
307
+ activation=silu
308
+
309
+ [shortcut]
310
+ from=-3
311
+ activation=linear
312
+
313
+ [convolutional]
314
+ batch_normalize=1
315
+ filters=160
316
+ size=1
317
+ stride=1
318
+ pad=1
319
+ activation=silu
320
+
321
+ [convolutional]
322
+ batch_normalize=1
323
+ filters=160
324
+ size=3
325
+ stride=1
326
+ pad=1
327
+ activation=silu
328
+
329
+ [shortcut]
330
+ from=-3
331
+ activation=linear
332
+
333
+ [convolutional]
334
+ batch_normalize=1
335
+ filters=160
336
+ size=1
337
+ stride=1
338
+ pad=1
339
+ activation=silu
340
+
341
+ [convolutional]
342
+ batch_normalize=1
343
+ filters=160
344
+ size=3
345
+ stride=1
346
+ pad=1
347
+ activation=silu
348
+
349
+ [shortcut]
350
+ from=-3
351
+ activation=linear
352
+
353
+ [convolutional]
354
+ batch_normalize=1
355
+ filters=160
356
+ size=1
357
+ stride=1
358
+ pad=1
359
+ activation=silu
360
+
361
+ [convolutional]
362
+ batch_normalize=1
363
+ filters=160
364
+ size=3
365
+ stride=1
366
+ pad=1
367
+ activation=silu
368
+
369
+ [shortcut]
370
+ from=-3
371
+ activation=linear
372
+
373
+ [convolutional]
374
+ batch_normalize=1
375
+ filters=160
376
+ size=1
377
+ stride=1
378
+ pad=1
379
+ activation=silu
380
+
381
+ [convolutional]
382
+ batch_normalize=1
383
+ filters=160
384
+ size=3
385
+ stride=1
386
+ pad=1
387
+ activation=silu
388
+
389
+ [shortcut]
390
+ from=-3
391
+ activation=linear
392
+
393
+ [convolutional]
394
+ batch_normalize=1
395
+ filters=160
396
+ size=1
397
+ stride=1
398
+ pad=1
399
+ activation=silu
400
+
401
+ [convolutional]
402
+ batch_normalize=1
403
+ filters=160
404
+ size=3
405
+ stride=1
406
+ pad=1
407
+ activation=silu
408
+
409
+ [shortcut]
410
+ from=-3
411
+ activation=linear
412
+
413
+ [convolutional]
414
+ batch_normalize=1
415
+ filters=160
416
+ size=1
417
+ stride=1
418
+ pad=1
419
+ activation=silu
420
+
421
+ [convolutional]
422
+ batch_normalize=1
423
+ filters=160
424
+ size=3
425
+ stride=1
426
+ pad=1
427
+ activation=silu
428
+
429
+ [shortcut]
430
+ from=-3
431
+ activation=linear
432
+
433
+ # Transition first
434
+
435
+ [convolutional]
436
+ batch_normalize=1
437
+ filters=160
438
+ size=1
439
+ stride=1
440
+ pad=1
441
+ activation=silu
442
+
443
+ # Merge [-1 -(4+3k)]
444
+
445
+ [route]
446
+ layers = -1,-34
447
+
448
+ # Transition last
449
+
450
+ # 57 (previous+7+3k)
451
+ [convolutional]
452
+ batch_normalize=1
453
+ filters=320
454
+ size=1
455
+ stride=1
456
+ pad=1
457
+ activation=silu
458
+
459
+ # P4
460
+
461
+ # Downsample
462
+
463
+ [convolutional]
464
+ batch_normalize=1
465
+ filters=640
466
+ size=3
467
+ stride=2
468
+ pad=1
469
+ activation=silu
470
+
471
+ # Split
472
+
473
+ [convolutional]
474
+ batch_normalize=1
475
+ filters=320
476
+ size=1
477
+ stride=1
478
+ pad=1
479
+ activation=silu
480
+
481
+ [route]
482
+ layers = -2
483
+
484
+ [convolutional]
485
+ batch_normalize=1
486
+ filters=320
487
+ size=1
488
+ stride=1
489
+ pad=1
490
+ activation=silu
491
+
492
+ # Residual Block
493
+
494
+ [convolutional]
495
+ batch_normalize=1
496
+ filters=320
497
+ size=1
498
+ stride=1
499
+ pad=1
500
+ activation=silu
501
+
502
+ [convolutional]
503
+ batch_normalize=1
504
+ filters=320
505
+ size=3
506
+ stride=1
507
+ pad=1
508
+ activation=silu
509
+
510
+ [shortcut]
511
+ from=-3
512
+ activation=linear
513
+
514
+ [convolutional]
515
+ batch_normalize=1
516
+ filters=320
517
+ size=1
518
+ stride=1
519
+ pad=1
520
+ activation=silu
521
+
522
+ [convolutional]
523
+ batch_normalize=1
524
+ filters=320
525
+ size=3
526
+ stride=1
527
+ pad=1
528
+ activation=silu
529
+
530
+ [shortcut]
531
+ from=-3
532
+ activation=linear
533
+
534
+ [convolutional]
535
+ batch_normalize=1
536
+ filters=320
537
+ size=1
538
+ stride=1
539
+ pad=1
540
+ activation=silu
541
+
542
+ [convolutional]
543
+ batch_normalize=1
544
+ filters=320
545
+ size=3
546
+ stride=1
547
+ pad=1
548
+ activation=silu
549
+
550
+ [shortcut]
551
+ from=-3
552
+ activation=linear
553
+
554
+ [convolutional]
555
+ batch_normalize=1
556
+ filters=320
557
+ size=1
558
+ stride=1
559
+ pad=1
560
+ activation=silu
561
+
562
+ [convolutional]
563
+ batch_normalize=1
564
+ filters=320
565
+ size=3
566
+ stride=1
567
+ pad=1
568
+ activation=silu
569
+
570
+ [shortcut]
571
+ from=-3
572
+ activation=linear
573
+
574
+ [convolutional]
575
+ batch_normalize=1
576
+ filters=320
577
+ size=1
578
+ stride=1
579
+ pad=1
580
+ activation=silu
581
+
582
+ [convolutional]
583
+ batch_normalize=1
584
+ filters=320
585
+ size=3
586
+ stride=1
587
+ pad=1
588
+ activation=silu
589
+
590
+ [shortcut]
591
+ from=-3
592
+ activation=linear
593
+
594
+ [convolutional]
595
+ batch_normalize=1
596
+ filters=320
597
+ size=1
598
+ stride=1
599
+ pad=1
600
+ activation=silu
601
+
602
+ [convolutional]
603
+ batch_normalize=1
604
+ filters=320
605
+ size=3
606
+ stride=1
607
+ pad=1
608
+ activation=silu
609
+
610
+ [shortcut]
611
+ from=-3
612
+ activation=linear
613
+
614
+ [convolutional]
615
+ batch_normalize=1
616
+ filters=320
617
+ size=1
618
+ stride=1
619
+ pad=1
620
+ activation=silu
621
+
622
+ [convolutional]
623
+ batch_normalize=1
624
+ filters=320
625
+ size=3
626
+ stride=1
627
+ pad=1
628
+ activation=silu
629
+
630
+ [shortcut]
631
+ from=-3
632
+ activation=linear
633
+
634
+ [convolutional]
635
+ batch_normalize=1
636
+ filters=320
637
+ size=1
638
+ stride=1
639
+ pad=1
640
+ activation=silu
641
+
642
+ [convolutional]
643
+ batch_normalize=1
644
+ filters=320
645
+ size=3
646
+ stride=1
647
+ pad=1
648
+ activation=silu
649
+
650
+ [shortcut]
651
+ from=-3
652
+ activation=linear
653
+
654
+ [convolutional]
655
+ batch_normalize=1
656
+ filters=320
657
+ size=1
658
+ stride=1
659
+ pad=1
660
+ activation=silu
661
+
662
+ [convolutional]
663
+ batch_normalize=1
664
+ filters=320
665
+ size=3
666
+ stride=1
667
+ pad=1
668
+ activation=silu
669
+
670
+ [shortcut]
671
+ from=-3
672
+ activation=linear
673
+
674
+ [convolutional]
675
+ batch_normalize=1
676
+ filters=320
677
+ size=1
678
+ stride=1
679
+ pad=1
680
+ activation=silu
681
+
682
+ [convolutional]
683
+ batch_normalize=1
684
+ filters=320
685
+ size=3
686
+ stride=1
687
+ pad=1
688
+ activation=silu
689
+
690
+ [shortcut]
691
+ from=-3
692
+ activation=linear
693
+
694
+ # Transition first
695
+
696
+ [convolutional]
697
+ batch_normalize=1
698
+ filters=320
699
+ size=1
700
+ stride=1
701
+ pad=1
702
+ activation=silu
703
+
704
+ # Merge [-1 -(3k+4)]
705
+
706
+ [route]
707
+ layers = -1,-34
708
+
709
+ # Transition last
710
+
711
+ # 94 (previous+7+3k)
712
+ [convolutional]
713
+ batch_normalize=1
714
+ filters=640
715
+ size=1
716
+ stride=1
717
+ pad=1
718
+ activation=silu
719
+
720
+ # P5
721
+
722
+ # Downsample
723
+
724
+ [convolutional]
725
+ batch_normalize=1
726
+ filters=1280
727
+ size=3
728
+ stride=2
729
+ pad=1
730
+ activation=silu
731
+
732
+ # Split
733
+
734
+ [convolutional]
735
+ batch_normalize=1
736
+ filters=640
737
+ size=1
738
+ stride=1
739
+ pad=1
740
+ activation=silu
741
+
742
+ [route]
743
+ layers = -2
744
+
745
+ [convolutional]
746
+ batch_normalize=1
747
+ filters=640
748
+ size=1
749
+ stride=1
750
+ pad=1
751
+ activation=silu
752
+
753
+ # Residual Block
754
+
755
+ [convolutional]
756
+ batch_normalize=1
757
+ filters=640
758
+ size=1
759
+ stride=1
760
+ pad=1
761
+ activation=silu
762
+
763
+ [convolutional]
764
+ batch_normalize=1
765
+ filters=640
766
+ size=3
767
+ stride=1
768
+ pad=1
769
+ activation=silu
770
+
771
+ [shortcut]
772
+ from=-3
773
+ activation=linear
774
+
775
+ [convolutional]
776
+ batch_normalize=1
777
+ filters=640
778
+ size=1
779
+ stride=1
780
+ pad=1
781
+ activation=silu
782
+
783
+ [convolutional]
784
+ batch_normalize=1
785
+ filters=640
786
+ size=3
787
+ stride=1
788
+ pad=1
789
+ activation=silu
790
+
791
+ [shortcut]
792
+ from=-3
793
+ activation=linear
794
+
795
+ [convolutional]
796
+ batch_normalize=1
797
+ filters=640
798
+ size=1
799
+ stride=1
800
+ pad=1
801
+ activation=silu
802
+
803
+ [convolutional]
804
+ batch_normalize=1
805
+ filters=640
806
+ size=3
807
+ stride=1
808
+ pad=1
809
+ activation=silu
810
+
811
+ [shortcut]
812
+ from=-3
813
+ activation=linear
814
+
815
+ [convolutional]
816
+ batch_normalize=1
817
+ filters=640
818
+ size=1
819
+ stride=1
820
+ pad=1
821
+ activation=silu
822
+
823
+ [convolutional]
824
+ batch_normalize=1
825
+ filters=640
826
+ size=3
827
+ stride=1
828
+ pad=1
829
+ activation=silu
830
+
831
+ [shortcut]
832
+ from=-3
833
+ activation=linear
834
+
835
+ [convolutional]
836
+ batch_normalize=1
837
+ filters=640
838
+ size=1
839
+ stride=1
840
+ pad=1
841
+ activation=silu
842
+
843
+ [convolutional]
844
+ batch_normalize=1
845
+ filters=640
846
+ size=3
847
+ stride=1
848
+ pad=1
849
+ activation=silu
850
+
851
+ [shortcut]
852
+ from=-3
853
+ activation=linear
854
+
855
+ # Transition first
856
+
857
+ [convolutional]
858
+ batch_normalize=1
859
+ filters=640
860
+ size=1
861
+ stride=1
862
+ pad=1
863
+ activation=silu
864
+
865
+ # Merge [-1 -(3k+4)]
866
+
867
+ [route]
868
+ layers = -1,-19
869
+
870
+ # Transition last
871
+
872
+ # 116 (previous+7+3k)
873
+ [convolutional]
874
+ batch_normalize=1
875
+ filters=1280
876
+ size=1
877
+ stride=1
878
+ pad=1
879
+ activation=silu
880
+
881
+ # ============ End of Backbone ============ #
882
+
883
+ # ============ Neck ============ #
884
+
885
+ # CSPSPP
886
+
887
+ [convolutional]
888
+ batch_normalize=1
889
+ filters=640
890
+ size=1
891
+ stride=1
892
+ pad=1
893
+ activation=silu
894
+
895
+ [route]
896
+ layers = -2
897
+
898
+ [convolutional]
899
+ batch_normalize=1
900
+ filters=640
901
+ size=1
902
+ stride=1
903
+ pad=1
904
+ activation=silu
905
+
906
+ [convolutional]
907
+ batch_normalize=1
908
+ size=3
909
+ stride=1
910
+ pad=1
911
+ filters=640
912
+ activation=silu
913
+
914
+ [convolutional]
915
+ batch_normalize=1
916
+ filters=640
917
+ size=1
918
+ stride=1
919
+ pad=1
920
+ activation=silu
921
+
922
+ ### SPP ###
923
+ [maxpool]
924
+ stride=1
925
+ size=5
926
+
927
+ [route]
928
+ layers=-2
929
+
930
+ [maxpool]
931
+ stride=1
932
+ size=9
933
+
934
+ [route]
935
+ layers=-4
936
+
937
+ [maxpool]
938
+ stride=1
939
+ size=13
940
+
941
+ [route]
942
+ layers=-1,-3,-5,-6
943
+ ### End SPP ###
944
+
945
+ [convolutional]
946
+ batch_normalize=1
947
+ filters=640
948
+ size=1
949
+ stride=1
950
+ pad=1
951
+ activation=silu
952
+
953
+ [convolutional]
954
+ batch_normalize=1
955
+ size=3
956
+ stride=1
957
+ pad=1
958
+ filters=640
959
+ activation=silu
960
+
961
+ [convolutional]
962
+ batch_normalize=1
963
+ filters=640
964
+ size=1
965
+ stride=1
966
+ pad=1
967
+ activation=silu
968
+
969
+ [convolutional]
970
+ batch_normalize=1
971
+ size=3
972
+ stride=1
973
+ pad=1
974
+ filters=640
975
+ activation=silu
976
+
977
+ [route]
978
+ layers = -1, -15
979
+
980
+ # 133 (previous+6+5+2k)
981
+ [convolutional]
982
+ batch_normalize=1
983
+ filters=640
984
+ size=1
985
+ stride=1
986
+ pad=1
987
+ activation=silu
988
+
989
+ # End of CSPSPP
990
+
991
+
992
+ # FPN-4
993
+
994
+ [convolutional]
995
+ batch_normalize=1
996
+ filters=320
997
+ size=1
998
+ stride=1
999
+ pad=1
1000
+ activation=silu
1001
+
1002
+ [upsample]
1003
+ stride=2
1004
+
1005
+ [route]
1006
+ layers = 94
1007
+
1008
+ [convolutional]
1009
+ batch_normalize=1
1010
+ filters=320
1011
+ size=1
1012
+ stride=1
1013
+ pad=1
1014
+ activation=silu
1015
+
1016
+ [route]
1017
+ layers = -1, -3
1018
+
1019
+ [convolutional]
1020
+ batch_normalize=1
1021
+ filters=320
1022
+ size=1
1023
+ stride=1
1024
+ pad=1
1025
+ activation=silu
1026
+
1027
+ # Split
1028
+
1029
+ [convolutional]
1030
+ batch_normalize=1
1031
+ filters=320
1032
+ size=1
1033
+ stride=1
1034
+ pad=1
1035
+ activation=silu
1036
+
1037
+ [route]
1038
+ layers = -2
1039
+
1040
+ # Plain Block
1041
+
1042
+ [convolutional]
1043
+ batch_normalize=1
1044
+ filters=320
1045
+ size=1
1046
+ stride=1
1047
+ pad=1
1048
+ activation=silu
1049
+
1050
+ [convolutional]
1051
+ batch_normalize=1
1052
+ size=3
1053
+ stride=1
1054
+ pad=1
1055
+ filters=320
1056
+ activation=silu
1057
+
1058
+ [convolutional]
1059
+ batch_normalize=1
1060
+ filters=320
1061
+ size=1
1062
+ stride=1
1063
+ pad=1
1064
+ activation=silu
1065
+
1066
+ [convolutional]
1067
+ batch_normalize=1
1068
+ size=3
1069
+ stride=1
1070
+ pad=1
1071
+ filters=320
1072
+ activation=silu
1073
+
1074
+ [convolutional]
1075
+ batch_normalize=1
1076
+ filters=320
1077
+ size=1
1078
+ stride=1
1079
+ pad=1
1080
+ activation=silu
1081
+
1082
+ [convolutional]
1083
+ batch_normalize=1
1084
+ size=3
1085
+ stride=1
1086
+ pad=1
1087
+ filters=320
1088
+ activation=silu
1089
+
1090
+ # Merge [-1, -(2k+2)]
1091
+
1092
+ [route]
1093
+ layers = -1, -8
1094
+
1095
+ # Transition last
1096
+
1097
+ # 149 (previous+6+4+2k)
1098
+ [convolutional]
1099
+ batch_normalize=1
1100
+ filters=320
1101
+ size=1
1102
+ stride=1
1103
+ pad=1
1104
+ activation=silu
1105
+
1106
+
1107
+ # FPN-3
1108
+
1109
+ [convolutional]
1110
+ batch_normalize=1
1111
+ filters=160
1112
+ size=1
1113
+ stride=1
1114
+ pad=1
1115
+ activation=silu
1116
+
1117
+ [upsample]
1118
+ stride=2
1119
+
1120
+ [route]
1121
+ layers = 57
1122
+
1123
+ [convolutional]
1124
+ batch_normalize=1
1125
+ filters=160
1126
+ size=1
1127
+ stride=1
1128
+ pad=1
1129
+ activation=silu
1130
+
1131
+ [route]
1132
+ layers = -1, -3
1133
+
1134
+ [convolutional]
1135
+ batch_normalize=1
1136
+ filters=160
1137
+ size=1
1138
+ stride=1
1139
+ pad=1
1140
+ activation=silu
1141
+
1142
+ # Split
1143
+
1144
+ [convolutional]
1145
+ batch_normalize=1
1146
+ filters=160
1147
+ size=1
1148
+ stride=1
1149
+ pad=1
1150
+ activation=silu
1151
+
1152
+ [route]
1153
+ layers = -2
1154
+
1155
+ # Plain Block
1156
+
1157
+ [convolutional]
1158
+ batch_normalize=1
1159
+ filters=160
1160
+ size=1
1161
+ stride=1
1162
+ pad=1
1163
+ activation=silu
1164
+
1165
+ [convolutional]
1166
+ batch_normalize=1
1167
+ size=3
1168
+ stride=1
1169
+ pad=1
1170
+ filters=160
1171
+ activation=silu
1172
+
1173
+ [convolutional]
1174
+ batch_normalize=1
1175
+ filters=160
1176
+ size=1
1177
+ stride=1
1178
+ pad=1
1179
+ activation=silu
1180
+
1181
+ [convolutional]
1182
+ batch_normalize=1
1183
+ size=3
1184
+ stride=1
1185
+ pad=1
1186
+ filters=160
1187
+ activation=silu
1188
+
1189
+ [convolutional]
1190
+ batch_normalize=1
1191
+ filters=160
1192
+ size=1
1193
+ stride=1
1194
+ pad=1
1195
+ activation=silu
1196
+
1197
+ [convolutional]
1198
+ batch_normalize=1
1199
+ size=3
1200
+ stride=1
1201
+ pad=1
1202
+ filters=160
1203
+ activation=silu
1204
+
1205
+ # Merge [-1, -(2k+2)]
1206
+
1207
+ [route]
1208
+ layers = -1, -8
1209
+
1210
+ # Transition last
1211
+
1212
+ # 165 (previous+6+4+2k)
1213
+ [convolutional]
1214
+ batch_normalize=1
1215
+ filters=160
1216
+ size=1
1217
+ stride=1
1218
+ pad=1
1219
+ activation=silu
1220
+
1221
+
1222
+ # PAN-4
1223
+
1224
+ [convolutional]
1225
+ batch_normalize=1
1226
+ size=3
1227
+ stride=2
1228
+ pad=1
1229
+ filters=320
1230
+ activation=silu
1231
+
1232
+ [route]
1233
+ layers = -1, 149
1234
+
1235
+ [convolutional]
1236
+ batch_normalize=1
1237
+ filters=320
1238
+ size=1
1239
+ stride=1
1240
+ pad=1
1241
+ activation=silu
1242
+
1243
+ # Split
1244
+
1245
+ [convolutional]
1246
+ batch_normalize=1
1247
+ filters=320
1248
+ size=1
1249
+ stride=1
1250
+ pad=1
1251
+ activation=silu
1252
+
1253
+ [route]
1254
+ layers = -2
1255
+
1256
+ # Plain Block
1257
+
1258
+ [convolutional]
1259
+ batch_normalize=1
1260
+ filters=320
1261
+ size=1
1262
+ stride=1
1263
+ pad=1
1264
+ activation=silu
1265
+
1266
+ [convolutional]
1267
+ batch_normalize=1
1268
+ size=3
1269
+ stride=1
1270
+ pad=1
1271
+ filters=320
1272
+ activation=silu
1273
+
1274
+ [convolutional]
1275
+ batch_normalize=1
1276
+ filters=320
1277
+ size=1
1278
+ stride=1
1279
+ pad=1
1280
+ activation=silu
1281
+
1282
+ [convolutional]
1283
+ batch_normalize=1
1284
+ size=3
1285
+ stride=1
1286
+ pad=1
1287
+ filters=320
1288
+ activation=silu
1289
+
1290
+ [convolutional]
1291
+ batch_normalize=1
1292
+ filters=320
1293
+ size=1
1294
+ stride=1
1295
+ pad=1
1296
+ activation=silu
1297
+
1298
+ [convolutional]
1299
+ batch_normalize=1
1300
+ size=3
1301
+ stride=1
1302
+ pad=1
1303
+ filters=320
1304
+ activation=silu
1305
+
1306
+ [route]
1307
+ layers = -1,-8
1308
+
1309
+ # Transition last
1310
+
1311
+ # 178 (previous+3+4+2k)
1312
+ [convolutional]
1313
+ batch_normalize=1
1314
+ filters=320
1315
+ size=1
1316
+ stride=1
1317
+ pad=1
1318
+ activation=silu
1319
+
1320
+
1321
+ # PAN-5
1322
+
1323
+ [convolutional]
1324
+ batch_normalize=1
1325
+ size=3
1326
+ stride=2
1327
+ pad=1
1328
+ filters=640
1329
+ activation=silu
1330
+
1331
+ [route]
1332
+ layers = -1, 133
1333
+
1334
+ [convolutional]
1335
+ batch_normalize=1
1336
+ filters=640
1337
+ size=1
1338
+ stride=1
1339
+ pad=1
1340
+ activation=silu
1341
+
1342
+ # Split
1343
+
1344
+ [convolutional]
1345
+ batch_normalize=1
1346
+ filters=640
1347
+ size=1
1348
+ stride=1
1349
+ pad=1
1350
+ activation=silu
1351
+
1352
+ [route]
1353
+ layers = -2
1354
+
1355
+ # Plain Block
1356
+
1357
+ [convolutional]
1358
+ batch_normalize=1
1359
+ filters=640
1360
+ size=1
1361
+ stride=1
1362
+ pad=1
1363
+ activation=silu
1364
+
1365
+ [convolutional]
1366
+ batch_normalize=1
1367
+ size=3
1368
+ stride=1
1369
+ pad=1
1370
+ filters=640
1371
+ activation=silu
1372
+
1373
+ [convolutional]
1374
+ batch_normalize=1
1375
+ filters=640
1376
+ size=1
1377
+ stride=1
1378
+ pad=1
1379
+ activation=silu
1380
+
1381
+ [convolutional]
1382
+ batch_normalize=1
1383
+ size=3
1384
+ stride=1
1385
+ pad=1
1386
+ filters=640
1387
+ activation=silu
1388
+
1389
+ [convolutional]
1390
+ batch_normalize=1
1391
+ filters=640
1392
+ size=1
1393
+ stride=1
1394
+ pad=1
1395
+ activation=silu
1396
+
1397
+ [convolutional]
1398
+ batch_normalize=1
1399
+ size=3
1400
+ stride=1
1401
+ pad=1
1402
+ filters=640
1403
+ activation=silu
1404
+
1405
+ [route]
1406
+ layers = -1,-8
1407
+
1408
+ # Transition last
1409
+
1410
+ # 191 (previous+3+4+2k)
1411
+ [convolutional]
1412
+ batch_normalize=1
1413
+ filters=640
1414
+ size=1
1415
+ stride=1
1416
+ pad=1
1417
+ activation=silu
1418
+
1419
+ # ============ End of Neck ============ #
1420
+
1421
+ # 192
1422
+ [implicit_add]
1423
+ filters=320
1424
+
1425
+ # 193
1426
+ [implicit_add]
1427
+ filters=640
1428
+
1429
+ # 194
1430
+ [implicit_add]
1431
+ filters=1280
1432
+
1433
+ # 195
1434
+ [implicit_mul]
1435
+ filters=255
1436
+
1437
+ # 196
1438
+ [implicit_mul]
1439
+ filters=255
1440
+
1441
+ # 197
1442
+ [implicit_mul]
1443
+ filters=255
1444
+
1445
+ # ============ Head ============ #
1446
+
1447
+ # YOLO-3
1448
+
1449
+ [route]
1450
+ layers = 165
1451
+
1452
+ [convolutional]
1453
+ batch_normalize=1
1454
+ size=3
1455
+ stride=1
1456
+ pad=1
1457
+ filters=320
1458
+ activation=silu
1459
+
1460
+ [shift_channels]
1461
+ from=192
1462
+
1463
+ [convolutional]
1464
+ size=1
1465
+ stride=1
1466
+ pad=1
1467
+ filters=255
1468
+ activation=linear
1469
+
1470
+ [control_channels]
1471
+ from=195
1472
+
1473
+ [yolo]
1474
+ mask = 0,1,2
1475
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1476
+ classes=80
1477
+ num=9
1478
+ jitter=.3
1479
+ ignore_thresh = .7
1480
+ truth_thresh = 1
1481
+ random=1
1482
+ scale_x_y = 1.05
1483
+ iou_thresh=0.213
1484
+ cls_normalizer=1.0
1485
+ iou_normalizer=0.07
1486
+ iou_loss=ciou
1487
+ nms_kind=greedynms
1488
+ beta_nms=0.6
1489
+
1490
+
1491
+ # YOLO-4
1492
+
1493
+ [route]
1494
+ layers = 178
1495
+
1496
+ [convolutional]
1497
+ batch_normalize=1
1498
+ size=3
1499
+ stride=1
1500
+ pad=1
1501
+ filters=640
1502
+ activation=silu
1503
+
1504
+ [shift_channels]
1505
+ from=193
1506
+
1507
+ [convolutional]
1508
+ size=1
1509
+ stride=1
1510
+ pad=1
1511
+ filters=255
1512
+ activation=linear
1513
+
1514
+ [control_channels]
1515
+ from=196
1516
+
1517
+ [yolo]
1518
+ mask = 3,4,5
1519
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1520
+ classes=80
1521
+ num=9
1522
+ jitter=.3
1523
+ ignore_thresh = .7
1524
+ truth_thresh = 1
1525
+ random=1
1526
+ scale_x_y = 1.05
1527
+ iou_thresh=0.213
1528
+ cls_normalizer=1.0
1529
+ iou_normalizer=0.07
1530
+ iou_loss=ciou
1531
+ nms_kind=greedynms
1532
+ beta_nms=0.6
1533
+
1534
+
1535
+ # YOLO-5
1536
+
1537
+ [route]
1538
+ layers = 191
1539
+
1540
+ [convolutional]
1541
+ batch_normalize=1
1542
+ size=3
1543
+ stride=1
1544
+ pad=1
1545
+ filters=1280
1546
+ activation=silu
1547
+
1548
+ [shift_channels]
1549
+ from=194
1550
+
1551
+ [convolutional]
1552
+ size=1
1553
+ stride=1
1554
+ pad=1
1555
+ filters=255
1556
+ activation=linear
1557
+
1558
+ [control_channels]
1559
+ from=197
1560
+
1561
+ [yolo]
1562
+ mask = 6,7,8
1563
+ anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1564
+ classes=80
1565
+ num=9
1566
+ jitter=.3
1567
+ ignore_thresh = .7
1568
+ truth_thresh = 1
1569
+ random=1
1570
+ scale_x_y = 1.05
1571
+ iou_thresh=0.213
1572
+ cls_normalizer=1.0
1573
+ iou_normalizer=0.07
1574
+ iou_loss=ciou
1575
+ nms_kind=greedynms
1576
+ beta_nms=0.6
asone/detectors/yolor/cfg/yolor_p6.cfg ADDED
@@ -0,0 +1,1760 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [net]
2
+ batch=64
3
+ subdivisions=8
4
+ width=1280
5
+ height=1280
6
+ channels=3
7
+ momentum=0.949
8
+ decay=0.0005
9
+ angle=0
10
+ saturation = 1.5
11
+ exposure = 1.5
12
+ hue=.1
13
+
14
+ learning_rate=0.00261
15
+ burn_in=1000
16
+ max_batches = 500500
17
+ policy=steps
18
+ steps=400000,450000
19
+ scales=.1,.1
20
+
21
+ mosaic=1
22
+
23
+
24
+ # ============ Backbone ============ #
25
+
26
+ # Stem
27
+
28
+ # P1
29
+
30
+ # Downsample
31
+
32
+ # 0
33
+ [reorg]
34
+
35
+ [convolutional]
36
+ batch_normalize=1
37
+ filters=64
38
+ size=3
39
+ stride=1
40
+ pad=1
41
+ activation=silu
42
+
43
+
44
+ # P2
45
+
46
+ # Downsample
47
+
48
+ [convolutional]
49
+ batch_normalize=1
50
+ filters=128
51
+ size=3
52
+ stride=2
53
+ pad=1
54
+ activation=silu
55
+
56
+ # Split
57
+
58
+ [convolutional]
59
+ batch_normalize=1
60
+ filters=64
61
+ size=1
62
+ stride=1
63
+ pad=1
64
+ activation=silu
65
+
66
+ [route]
67
+ layers = -2
68
+
69
+ [convolutional]
70
+ batch_normalize=1
71
+ filters=64
72
+ size=1
73
+ stride=1
74
+ pad=1
75
+ activation=silu
76
+
77
+ # Residual Block
78
+
79
+ [convolutional]
80
+ batch_normalize=1
81
+ filters=64
82
+ size=1
83
+ stride=1
84
+ pad=1
85
+ activation=silu
86
+
87
+ [convolutional]
88
+ batch_normalize=1
89
+ filters=64
90
+ size=3
91
+ stride=1
92
+ pad=1
93
+ activation=silu
94
+
95
+ [shortcut]
96
+ from=-3
97
+ activation=linear
98
+
99
+ [convolutional]
100
+ batch_normalize=1
101
+ filters=64
102
+ size=1
103
+ stride=1
104
+ pad=1
105
+ activation=silu
106
+
107
+ [convolutional]
108
+ batch_normalize=1
109
+ filters=64
110
+ size=3
111
+ stride=1
112
+ pad=1
113
+ activation=silu
114
+
115
+ [shortcut]
116
+ from=-3
117
+ activation=linear
118
+
119
+ [convolutional]
120
+ batch_normalize=1
121
+ filters=64
122
+ size=1
123
+ stride=1
124
+ pad=1
125
+ activation=silu
126
+
127
+ [convolutional]
128
+ batch_normalize=1
129
+ filters=64
130
+ size=3
131
+ stride=1
132
+ pad=1
133
+ activation=silu
134
+
135
+ [shortcut]
136
+ from=-3
137
+ activation=linear
138
+
139
+ # Transition first
140
+ #
141
+ #[convolutional]
142
+ #batch_normalize=1
143
+ #filters=64
144
+ #size=1
145
+ #stride=1
146
+ #pad=1
147
+ #activation=silu
148
+
149
+ # Merge [-1, -(3k+3)]
150
+
151
+ [route]
152
+ layers = -1,-12
153
+
154
+ # Transition last
155
+
156
+ # 16 (previous+6+3k)
157
+ [convolutional]
158
+ batch_normalize=1
159
+ filters=128
160
+ size=1
161
+ stride=1
162
+ pad=1
163
+ activation=silu
164
+
165
+
166
+ # P3
167
+
168
+ # Downsample
169
+
170
+ [convolutional]
171
+ batch_normalize=1
172
+ filters=256
173
+ size=3
174
+ stride=2
175
+ pad=1
176
+ activation=silu
177
+
178
+ # Split
179
+
180
+ [convolutional]
181
+ batch_normalize=1
182
+ filters=128
183
+ size=1
184
+ stride=1
185
+ pad=1
186
+ activation=silu
187
+
188
+ [route]
189
+ layers = -2
190
+
191
+ [convolutional]
192
+ batch_normalize=1
193
+ filters=128
194
+ size=1
195
+ stride=1
196
+ pad=1
197
+ activation=silu
198
+
199
+ # Residual Block
200
+
201
+ [convolutional]
202
+ batch_normalize=1
203
+ filters=128
204
+ size=1
205
+ stride=1
206
+ pad=1
207
+ activation=silu
208
+
209
+ [convolutional]
210
+ batch_normalize=1
211
+ filters=128
212
+ size=3
213
+ stride=1
214
+ pad=1
215
+ activation=silu
216
+
217
+ [shortcut]
218
+ from=-3
219
+ activation=linear
220
+
221
+ [convolutional]
222
+ batch_normalize=1
223
+ filters=128
224
+ size=1
225
+ stride=1
226
+ pad=1
227
+ activation=silu
228
+
229
+ [convolutional]
230
+ batch_normalize=1
231
+ filters=128
232
+ size=3
233
+ stride=1
234
+ pad=1
235
+ activation=silu
236
+
237
+ [shortcut]
238
+ from=-3
239
+ activation=linear
240
+
241
+ [convolutional]
242
+ batch_normalize=1
243
+ filters=128
244
+ size=1
245
+ stride=1
246
+ pad=1
247
+ activation=silu
248
+
249
+ [convolutional]
250
+ batch_normalize=1
251
+ filters=128
252
+ size=3
253
+ stride=1
254
+ pad=1
255
+ activation=silu
256
+
257
+ [shortcut]
258
+ from=-3
259
+ activation=linear
260
+
261
+ [convolutional]
262
+ batch_normalize=1
263
+ filters=128
264
+ size=1
265
+ stride=1
266
+ pad=1
267
+ activation=silu
268
+
269
+ [convolutional]
270
+ batch_normalize=1
271
+ filters=128
272
+ size=3
273
+ stride=1
274
+ pad=1
275
+ activation=silu
276
+
277
+ [shortcut]
278
+ from=-3
279
+ activation=linear
280
+
281
+ [convolutional]
282
+ batch_normalize=1
283
+ filters=128
284
+ size=1
285
+ stride=1
286
+ pad=1
287
+ activation=silu
288
+
289
+ [convolutional]
290
+ batch_normalize=1
291
+ filters=128
292
+ size=3
293
+ stride=1
294
+ pad=1
295
+ activation=silu
296
+
297
+ [shortcut]
298
+ from=-3
299
+ activation=linear
300
+
301
+ [convolutional]
302
+ batch_normalize=1
303
+ filters=128
304
+ size=1
305
+ stride=1
306
+ pad=1
307
+ activation=silu
308
+
309
+ [convolutional]
310
+ batch_normalize=1
311
+ filters=128
312
+ size=3
313
+ stride=1
314
+ pad=1
315
+ activation=silu
316
+
317
+ [shortcut]
318
+ from=-3
319
+ activation=linear
320
+
321
+ [convolutional]
322
+ batch_normalize=1
323
+ filters=128
324
+ size=1
325
+ stride=1
326
+ pad=1
327
+ activation=silu
328
+
329
+ [convolutional]
330
+ batch_normalize=1
331
+ filters=128
332
+ size=3
333
+ stride=1
334
+ pad=1
335
+ activation=silu
336
+
337
+ [shortcut]
338
+ from=-3
339
+ activation=linear
340
+
341
+ # Transition first
342
+ #
343
+ #[convolutional]
344
+ #batch_normalize=1
345
+ #filters=128
346
+ #size=1
347
+ #stride=1
348
+ #pad=1
349
+ #activation=silu
350
+
351
+ # Merge [-1, -(3k+3)]
352
+
353
+ [route]
354
+ layers = -1,-24
355
+
356
+ # Transition last
357
+
358
+ # 43 (previous+6+3k)
359
+ [convolutional]
360
+ batch_normalize=1
361
+ filters=256
362
+ size=1
363
+ stride=1
364
+ pad=1
365
+ activation=silu
366
+
367
+
368
+ # P4
369
+
370
+ # Downsample
371
+
372
+ [convolutional]
373
+ batch_normalize=1
374
+ filters=384
375
+ size=3
376
+ stride=2
377
+ pad=1
378
+ activation=silu
379
+
380
+ # Split
381
+
382
+ [convolutional]
383
+ batch_normalize=1
384
+ filters=192
385
+ size=1
386
+ stride=1
387
+ pad=1
388
+ activation=silu
389
+
390
+ [route]
391
+ layers = -2
392
+
393
+ [convolutional]
394
+ batch_normalize=1
395
+ filters=192
396
+ size=1
397
+ stride=1
398
+ pad=1
399
+ activation=silu
400
+
401
+ # Residual Block
402
+
403
+ [convolutional]
404
+ batch_normalize=1
405
+ filters=192
406
+ size=1
407
+ stride=1
408
+ pad=1
409
+ activation=silu
410
+
411
+ [convolutional]
412
+ batch_normalize=1
413
+ filters=192
414
+ size=3
415
+ stride=1
416
+ pad=1
417
+ activation=silu
418
+
419
+ [shortcut]
420
+ from=-3
421
+ activation=linear
422
+
423
+ [convolutional]
424
+ batch_normalize=1
425
+ filters=192
426
+ size=1
427
+ stride=1
428
+ pad=1
429
+ activation=silu
430
+
431
+ [convolutional]
432
+ batch_normalize=1
433
+ filters=192
434
+ size=3
435
+ stride=1
436
+ pad=1
437
+ activation=silu
438
+
439
+ [shortcut]
440
+ from=-3
441
+ activation=linear
442
+
443
+ [convolutional]
444
+ batch_normalize=1
445
+ filters=192
446
+ size=1
447
+ stride=1
448
+ pad=1
449
+ activation=silu
450
+
451
+ [convolutional]
452
+ batch_normalize=1
453
+ filters=192
454
+ size=3
455
+ stride=1
456
+ pad=1
457
+ activation=silu
458
+
459
+ [shortcut]
460
+ from=-3
461
+ activation=linear
462
+
463
+ [convolutional]
464
+ batch_normalize=1
465
+ filters=192
466
+ size=1
467
+ stride=1
468
+ pad=1
469
+ activation=silu
470
+
471
+ [convolutional]
472
+ batch_normalize=1
473
+ filters=192
474
+ size=3
475
+ stride=1
476
+ pad=1
477
+ activation=silu
478
+
479
+ [shortcut]
480
+ from=-3
481
+ activation=linear
482
+
483
+ [convolutional]
484
+ batch_normalize=1
485
+ filters=192
486
+ size=1
487
+ stride=1
488
+ pad=1
489
+ activation=silu
490
+
491
+ [convolutional]
492
+ batch_normalize=1
493
+ filters=192
494
+ size=3
495
+ stride=1
496
+ pad=1
497
+ activation=silu
498
+
499
+ [shortcut]
500
+ from=-3
501
+ activation=linear
502
+
503
+ [convolutional]
504
+ batch_normalize=1
505
+ filters=192
506
+ size=1
507
+ stride=1
508
+ pad=1
509
+ activation=silu
510
+
511
+ [convolutional]
512
+ batch_normalize=1
513
+ filters=192
514
+ size=3
515
+ stride=1
516
+ pad=1
517
+ activation=silu
518
+
519
+ [shortcut]
520
+ from=-3
521
+ activation=linear
522
+
523
+ [convolutional]
524
+ batch_normalize=1
525
+ filters=192
526
+ size=1
527
+ stride=1
528
+ pad=1
529
+ activation=silu
530
+
531
+ [convolutional]
532
+ batch_normalize=1
533
+ filters=192
534
+ size=3
535
+ stride=1
536
+ pad=1
537
+ activation=silu
538
+
539
+ [shortcut]
540
+ from=-3
541
+ activation=linear
542
+
543
+ # Transition first
544
+ #
545
+ #[convolutional]
546
+ #batch_normalize=1
547
+ #filters=192
548
+ #size=1
549
+ #stride=1
550
+ #pad=1
551
+ #activation=silu
552
+
553
+ # Merge [-1, -(3k+3)]
554
+
555
+ [route]
556
+ layers = -1,-24
557
+
558
+ # Transition last
559
+
560
+ # 70 (previous+6+3k)
561
+ [convolutional]
562
+ batch_normalize=1
563
+ filters=384
564
+ size=1
565
+ stride=1
566
+ pad=1
567
+ activation=silu
568
+
569
+
570
+ # P5
571
+
572
+ # Downsample
573
+
574
+ [convolutional]
575
+ batch_normalize=1
576
+ filters=512
577
+ size=3
578
+ stride=2
579
+ pad=1
580
+ activation=silu
581
+
582
+ # Split
583
+
584
+ [convolutional]
585
+ batch_normalize=1
586
+ filters=256
587
+ size=1
588
+ stride=1
589
+ pad=1
590
+ activation=silu
591
+
592
+ [route]
593
+ layers = -2
594
+
595
+ [convolutional]
596
+ batch_normalize=1
597
+ filters=256
598
+ size=1
599
+ stride=1
600
+ pad=1
601
+ activation=silu
602
+
603
+ # Residual Block
604
+
605
+ [convolutional]
606
+ batch_normalize=1
607
+ filters=256
608
+ size=1
609
+ stride=1
610
+ pad=1
611
+ activation=silu
612
+
613
+ [convolutional]
614
+ batch_normalize=1
615
+ filters=256
616
+ size=3
617
+ stride=1
618
+ pad=1
619
+ activation=silu
620
+
621
+ [shortcut]
622
+ from=-3
623
+ activation=linear
624
+
625
+ [convolutional]
626
+ batch_normalize=1
627
+ filters=256
628
+ size=1
629
+ stride=1
630
+ pad=1
631
+ activation=silu
632
+
633
+ [convolutional]
634
+ batch_normalize=1
635
+ filters=256
636
+ size=3
637
+ stride=1
638
+ pad=1
639
+ activation=silu
640
+
641
+ [shortcut]
642
+ from=-3
643
+ activation=linear
644
+
645
+ [convolutional]
646
+ batch_normalize=1
647
+ filters=256
648
+ size=1
649
+ stride=1
650
+ pad=1
651
+ activation=silu
652
+
653
+ [convolutional]
654
+ batch_normalize=1
655
+ filters=256
656
+ size=3
657
+ stride=1
658
+ pad=1
659
+ activation=silu
660
+
661
+ [shortcut]
662
+ from=-3
663
+ activation=linear
664
+
665
+ # Transition first
666
+ #
667
+ #[convolutional]
668
+ #batch_normalize=1
669
+ #filters=256
670
+ #size=1
671
+ #stride=1
672
+ #pad=1
673
+ #activation=silu
674
+
675
+ # Merge [-1, -(3k+3)]
676
+
677
+ [route]
678
+ layers = -1,-12
679
+
680
+ # Transition last
681
+
682
+ # 85 (previous+6+3k)
683
+ [convolutional]
684
+ batch_normalize=1
685
+ filters=512
686
+ size=1
687
+ stride=1
688
+ pad=1
689
+ activation=silu
690
+
691
+
692
+ # P6
693
+
694
+ # Downsample
695
+
696
+ [convolutional]
697
+ batch_normalize=1
698
+ filters=640
699
+ size=3
700
+ stride=2
701
+ pad=1
702
+ activation=silu
703
+
704
+ # Split
705
+
706
+ [convolutional]
707
+ batch_normalize=1
708
+ filters=320
709
+ size=1
710
+ stride=1
711
+ pad=1
712
+ activation=silu
713
+
714
+ [route]
715
+ layers = -2
716
+
717
+ [convolutional]
718
+ batch_normalize=1
719
+ filters=320
720
+ size=1
721
+ stride=1
722
+ pad=1
723
+ activation=silu
724
+
725
+ # Residual Block
726
+
727
+ [convolutional]
728
+ batch_normalize=1
729
+ filters=320
730
+ size=1
731
+ stride=1
732
+ pad=1
733
+ activation=silu
734
+
735
+ [convolutional]
736
+ batch_normalize=1
737
+ filters=320
738
+ size=3
739
+ stride=1
740
+ pad=1
741
+ activation=silu
742
+
743
+ [shortcut]
744
+ from=-3
745
+ activation=linear
746
+
747
+ [convolutional]
748
+ batch_normalize=1
749
+ filters=320
750
+ size=1
751
+ stride=1
752
+ pad=1
753
+ activation=silu
754
+
755
+ [convolutional]
756
+ batch_normalize=1
757
+ filters=320
758
+ size=3
759
+ stride=1
760
+ pad=1
761
+ activation=silu
762
+
763
+ [shortcut]
764
+ from=-3
765
+ activation=linear
766
+
767
+ [convolutional]
768
+ batch_normalize=1
769
+ filters=320
770
+ size=1
771
+ stride=1
772
+ pad=1
773
+ activation=silu
774
+
775
+ [convolutional]
776
+ batch_normalize=1
777
+ filters=320
778
+ size=3
779
+ stride=1
780
+ pad=1
781
+ activation=silu
782
+
783
+ [shortcut]
784
+ from=-3
785
+ activation=linear
786
+
787
+ # Transition first
788
+ #
789
+ #[convolutional]
790
+ #batch_normalize=1
791
+ #filters=320
792
+ #size=1
793
+ #stride=1
794
+ #pad=1
795
+ #activation=silu
796
+
797
+ # Merge [-1, -(3k+3)]
798
+
799
+ [route]
800
+ layers = -1,-12
801
+
802
+ # Transition last
803
+
804
+ # 100 (previous+6+3k)
805
+ [convolutional]
806
+ batch_normalize=1
807
+ filters=640
808
+ size=1
809
+ stride=1
810
+ pad=1
811
+ activation=silu
812
+
813
+ # ============ End of Backbone ============ #
814
+
815
+ # ============ Neck ============ #
816
+
817
+ # CSPSPP
818
+
819
+ [convolutional]
820
+ batch_normalize=1
821
+ filters=320
822
+ size=1
823
+ stride=1
824
+ pad=1
825
+ activation=silu
826
+
827
+ [route]
828
+ layers = -2
829
+
830
+ [convolutional]
831
+ batch_normalize=1
832
+ filters=320
833
+ size=1
834
+ stride=1
835
+ pad=1
836
+ activation=silu
837
+
838
+ [convolutional]
839
+ batch_normalize=1
840
+ size=3
841
+ stride=1
842
+ pad=1
843
+ filters=320
844
+ activation=silu
845
+
846
+ [convolutional]
847
+ batch_normalize=1
848
+ filters=320
849
+ size=1
850
+ stride=1
851
+ pad=1
852
+ activation=silu
853
+
854
+ ### SPP ###
855
+ [maxpool]
856
+ stride=1
857
+ size=5
858
+
859
+ [route]
860
+ layers=-2
861
+
862
+ [maxpool]
863
+ stride=1
864
+ size=9
865
+
866
+ [route]
867
+ layers=-4
868
+
869
+ [maxpool]
870
+ stride=1
871
+ size=13
872
+
873
+ [route]
874
+ layers=-1,-3,-5,-6
875
+ ### End SPP ###
876
+
877
+ [convolutional]
878
+ batch_normalize=1
879
+ filters=320
880
+ size=1
881
+ stride=1
882
+ pad=1
883
+ activation=silu
884
+
885
+ [convolutional]
886
+ batch_normalize=1
887
+ size=3
888
+ stride=1
889
+ pad=1
890
+ filters=320
891
+ activation=silu
892
+
893
+ [route]
894
+ layers = -1, -13
895
+
896
+ # 115 (previous+6+5+2k)
897
+ [convolutional]
898
+ batch_normalize=1
899
+ filters=320
900
+ size=1
901
+ stride=1
902
+ pad=1
903
+ activation=silu
904
+
905
+ # End of CSPSPP
906
+
907
+
908
+ # FPN-5
909
+
910
+ [convolutional]
911
+ batch_normalize=1
912
+ filters=256
913
+ size=1
914
+ stride=1
915
+ pad=1
916
+ activation=silu
917
+
918
+ [upsample]
919
+ stride=2
920
+
921
+ [route]
922
+ layers = 85
923
+
924
+ [convolutional]
925
+ batch_normalize=1
926
+ filters=256
927
+ size=1
928
+ stride=1
929
+ pad=1
930
+ activation=silu
931
+
932
+ [route]
933
+ layers = -1, -3
934
+
935
+ [convolutional]
936
+ batch_normalize=1
937
+ filters=256
938
+ size=1
939
+ stride=1
940
+ pad=1
941
+ activation=silu
942
+
943
+ # Split
944
+
945
+ [convolutional]
946
+ batch_normalize=1
947
+ filters=256
948
+ size=1
949
+ stride=1
950
+ pad=1
951
+ activation=silu
952
+
953
+ [route]
954
+ layers = -2
955
+
956
+ # Plain Block
957
+
958
+ [convolutional]
959
+ batch_normalize=1
960
+ filters=256
961
+ size=1
962
+ stride=1
963
+ pad=1
964
+ activation=silu
965
+
966
+ [convolutional]
967
+ batch_normalize=1
968
+ size=3
969
+ stride=1
970
+ pad=1
971
+ filters=256
972
+ activation=silu
973
+
974
+ [convolutional]
975
+ batch_normalize=1
976
+ filters=256
977
+ size=1
978
+ stride=1
979
+ pad=1
980
+ activation=silu
981
+
982
+ [convolutional]
983
+ batch_normalize=1
984
+ size=3
985
+ stride=1
986
+ pad=1
987
+ filters=256
988
+ activation=silu
989
+
990
+ [convolutional]
991
+ batch_normalize=1
992
+ filters=256
993
+ size=1
994
+ stride=1
995
+ pad=1
996
+ activation=silu
997
+
998
+ [convolutional]
999
+ batch_normalize=1
1000
+ size=3
1001
+ stride=1
1002
+ pad=1
1003
+ filters=256
1004
+ activation=silu
1005
+
1006
+ # Merge [-1, -(2k+2)]
1007
+
1008
+ [route]
1009
+ layers = -1, -8
1010
+
1011
+ # Transition last
1012
+
1013
+ # 131 (previous+6+4+2k)
1014
+ [convolutional]
1015
+ batch_normalize=1
1016
+ filters=256
1017
+ size=1
1018
+ stride=1
1019
+ pad=1
1020
+ activation=silu
1021
+
1022
+
1023
+ # FPN-4
1024
+
1025
+ [convolutional]
1026
+ batch_normalize=1
1027
+ filters=192
1028
+ size=1
1029
+ stride=1
1030
+ pad=1
1031
+ activation=silu
1032
+
1033
+ [upsample]
1034
+ stride=2
1035
+
1036
+ [route]
1037
+ layers = 70
1038
+
1039
+ [convolutional]
1040
+ batch_normalize=1
1041
+ filters=192
1042
+ size=1
1043
+ stride=1
1044
+ pad=1
1045
+ activation=silu
1046
+
1047
+ [route]
1048
+ layers = -1, -3
1049
+
1050
+ [convolutional]
1051
+ batch_normalize=1
1052
+ filters=192
1053
+ size=1
1054
+ stride=1
1055
+ pad=1
1056
+ activation=silu
1057
+
1058
+ # Split
1059
+
1060
+ [convolutional]
1061
+ batch_normalize=1
1062
+ filters=192
1063
+ size=1
1064
+ stride=1
1065
+ pad=1
1066
+ activation=silu
1067
+
1068
+ [route]
1069
+ layers = -2
1070
+
1071
+ # Plain Block
1072
+
1073
+ [convolutional]
1074
+ batch_normalize=1
1075
+ filters=192
1076
+ size=1
1077
+ stride=1
1078
+ pad=1
1079
+ activation=silu
1080
+
1081
+ [convolutional]
1082
+ batch_normalize=1
1083
+ size=3
1084
+ stride=1
1085
+ pad=1
1086
+ filters=192
1087
+ activation=silu
1088
+
1089
+ [convolutional]
1090
+ batch_normalize=1
1091
+ filters=192
1092
+ size=1
1093
+ stride=1
1094
+ pad=1
1095
+ activation=silu
1096
+
1097
+ [convolutional]
1098
+ batch_normalize=1
1099
+ size=3
1100
+ stride=1
1101
+ pad=1
1102
+ filters=192
1103
+ activation=silu
1104
+
1105
+ [convolutional]
1106
+ batch_normalize=1
1107
+ filters=192
1108
+ size=1
1109
+ stride=1
1110
+ pad=1
1111
+ activation=silu
1112
+
1113
+ [convolutional]
1114
+ batch_normalize=1
1115
+ size=3
1116
+ stride=1
1117
+ pad=1
1118
+ filters=192
1119
+ activation=silu
1120
+
1121
+ # Merge [-1, -(2k+2)]
1122
+
1123
+ [route]
1124
+ layers = -1, -8
1125
+
1126
+ # Transition last
1127
+
1128
+ # 147 (previous+6+4+2k)
1129
+ [convolutional]
1130
+ batch_normalize=1
1131
+ filters=192
1132
+ size=1
1133
+ stride=1
1134
+ pad=1
1135
+ activation=silu
1136
+
1137
+
1138
+ # FPN-3
1139
+
1140
+ [convolutional]
1141
+ batch_normalize=1
1142
+ filters=128
1143
+ size=1
1144
+ stride=1
1145
+ pad=1
1146
+ activation=silu
1147
+
1148
+ [upsample]
1149
+ stride=2
1150
+
1151
+ [route]
1152
+ layers = 43
1153
+
1154
+ [convolutional]
1155
+ batch_normalize=1
1156
+ filters=128
1157
+ size=1
1158
+ stride=1
1159
+ pad=1
1160
+ activation=silu
1161
+
1162
+ [route]
1163
+ layers = -1, -3
1164
+
1165
+ [convolutional]
1166
+ batch_normalize=1
1167
+ filters=128
1168
+ size=1
1169
+ stride=1
1170
+ pad=1
1171
+ activation=silu
1172
+
1173
+ # Split
1174
+
1175
+ [convolutional]
1176
+ batch_normalize=1
1177
+ filters=128
1178
+ size=1
1179
+ stride=1
1180
+ pad=1
1181
+ activation=silu
1182
+
1183
+ [route]
1184
+ layers = -2
1185
+
1186
+ # Plain Block
1187
+
1188
+ [convolutional]
1189
+ batch_normalize=1
1190
+ filters=128
1191
+ size=1
1192
+ stride=1
1193
+ pad=1
1194
+ activation=silu
1195
+
1196
+ [convolutional]
1197
+ batch_normalize=1
1198
+ size=3
1199
+ stride=1
1200
+ pad=1
1201
+ filters=128
1202
+ activation=silu
1203
+
1204
+ [convolutional]
1205
+ batch_normalize=1
1206
+ filters=128
1207
+ size=1
1208
+ stride=1
1209
+ pad=1
1210
+ activation=silu
1211
+
1212
+ [convolutional]
1213
+ batch_normalize=1
1214
+ size=3
1215
+ stride=1
1216
+ pad=1
1217
+ filters=128
1218
+ activation=silu
1219
+
1220
+ [convolutional]
1221
+ batch_normalize=1
1222
+ filters=128
1223
+ size=1
1224
+ stride=1
1225
+ pad=1
1226
+ activation=silu
1227
+
1228
+ [convolutional]
1229
+ batch_normalize=1
1230
+ size=3
1231
+ stride=1
1232
+ pad=1
1233
+ filters=128
1234
+ activation=silu
1235
+
1236
+ # Merge [-1, -(2k+2)]
1237
+
1238
+ [route]
1239
+ layers = -1, -8
1240
+
1241
+ # Transition last
1242
+
1243
+ # 163 (previous+6+4+2k)
1244
+ [convolutional]
1245
+ batch_normalize=1
1246
+ filters=128
1247
+ size=1
1248
+ stride=1
1249
+ pad=1
1250
+ activation=silu
1251
+
1252
+
1253
+ # PAN-4
1254
+
1255
+ [convolutional]
1256
+ batch_normalize=1
1257
+ size=3
1258
+ stride=2
1259
+ pad=1
1260
+ filters=192
1261
+ activation=silu
1262
+
1263
+ [route]
1264
+ layers = -1, 147
1265
+
1266
+ [convolutional]
1267
+ batch_normalize=1
1268
+ filters=192
1269
+ size=1
1270
+ stride=1
1271
+ pad=1
1272
+ activation=silu
1273
+
1274
+ # Split
1275
+
1276
+ [convolutional]
1277
+ batch_normalize=1
1278
+ filters=192
1279
+ size=1
1280
+ stride=1
1281
+ pad=1
1282
+ activation=silu
1283
+
1284
+ [route]
1285
+ layers = -2
1286
+
1287
+ # Plain Block
1288
+
1289
+ [convolutional]
1290
+ batch_normalize=1
1291
+ filters=192
1292
+ size=1
1293
+ stride=1
1294
+ pad=1
1295
+ activation=silu
1296
+
1297
+ [convolutional]
1298
+ batch_normalize=1
1299
+ size=3
1300
+ stride=1
1301
+ pad=1
1302
+ filters=192
1303
+ activation=silu
1304
+
1305
+ [convolutional]
1306
+ batch_normalize=1
1307
+ filters=192
1308
+ size=1
1309
+ stride=1
1310
+ pad=1
1311
+ activation=silu
1312
+
1313
+ [convolutional]
1314
+ batch_normalize=1
1315
+ size=3
1316
+ stride=1
1317
+ pad=1
1318
+ filters=192
1319
+ activation=silu
1320
+
1321
+ [convolutional]
1322
+ batch_normalize=1
1323
+ filters=192
1324
+ size=1
1325
+ stride=1
1326
+ pad=1
1327
+ activation=silu
1328
+
1329
+ [convolutional]
1330
+ batch_normalize=1
1331
+ size=3
1332
+ stride=1
1333
+ pad=1
1334
+ filters=192
1335
+ activation=silu
1336
+
1337
+ [route]
1338
+ layers = -1,-8
1339
+
1340
+ # Transition last
1341
+
1342
+ # 176 (previous+3+4+2k)
1343
+ [convolutional]
1344
+ batch_normalize=1
1345
+ filters=192
1346
+ size=1
1347
+ stride=1
1348
+ pad=1
1349
+ activation=silu
1350
+
1351
+
1352
+ # PAN-5
1353
+
1354
+ [convolutional]
1355
+ batch_normalize=1
1356
+ size=3
1357
+ stride=2
1358
+ pad=1
1359
+ filters=256
1360
+ activation=silu
1361
+
1362
+ [route]
1363
+ layers = -1, 131
1364
+
1365
+ [convolutional]
1366
+ batch_normalize=1
1367
+ filters=256
1368
+ size=1
1369
+ stride=1
1370
+ pad=1
1371
+ activation=silu
1372
+
1373
+ # Split
1374
+
1375
+ [convolutional]
1376
+ batch_normalize=1
1377
+ filters=256
1378
+ size=1
1379
+ stride=1
1380
+ pad=1
1381
+ activation=silu
1382
+
1383
+ [route]
1384
+ layers = -2
1385
+
1386
+ # Plain Block
1387
+
1388
+ [convolutional]
1389
+ batch_normalize=1
1390
+ filters=256
1391
+ size=1
1392
+ stride=1
1393
+ pad=1
1394
+ activation=silu
1395
+
1396
+ [convolutional]
1397
+ batch_normalize=1
1398
+ size=3
1399
+ stride=1
1400
+ pad=1
1401
+ filters=256
1402
+ activation=silu
1403
+
1404
+ [convolutional]
1405
+ batch_normalize=1
1406
+ filters=256
1407
+ size=1
1408
+ stride=1
1409
+ pad=1
1410
+ activation=silu
1411
+
1412
+ [convolutional]
1413
+ batch_normalize=1
1414
+ size=3
1415
+ stride=1
1416
+ pad=1
1417
+ filters=256
1418
+ activation=silu
1419
+
1420
+ [convolutional]
1421
+ batch_normalize=1
1422
+ filters=256
1423
+ size=1
1424
+ stride=1
1425
+ pad=1
1426
+ activation=silu
1427
+
1428
+ [convolutional]
1429
+ batch_normalize=1
1430
+ size=3
1431
+ stride=1
1432
+ pad=1
1433
+ filters=256
1434
+ activation=silu
1435
+
1436
+ [route]
1437
+ layers = -1,-8
1438
+
1439
+ # Transition last
1440
+
1441
+ # 189 (previous+3+4+2k)
1442
+ [convolutional]
1443
+ batch_normalize=1
1444
+ filters=256
1445
+ size=1
1446
+ stride=1
1447
+ pad=1
1448
+ activation=silu
1449
+
1450
+
1451
+ # PAN-6
1452
+
1453
+ [convolutional]
1454
+ batch_normalize=1
1455
+ size=3
1456
+ stride=2
1457
+ pad=1
1458
+ filters=320
1459
+ activation=silu
1460
+
1461
+ [route]
1462
+ layers = -1, 115
1463
+
1464
+ [convolutional]
1465
+ batch_normalize=1
1466
+ filters=320
1467
+ size=1
1468
+ stride=1
1469
+ pad=1
1470
+ activation=silu
1471
+
1472
+ # Split
1473
+
1474
+ [convolutional]
1475
+ batch_normalize=1
1476
+ filters=320
1477
+ size=1
1478
+ stride=1
1479
+ pad=1
1480
+ activation=silu
1481
+
1482
+ [route]
1483
+ layers = -2
1484
+
1485
+ # Plain Block
1486
+
1487
+ [convolutional]
1488
+ batch_normalize=1
1489
+ filters=320
1490
+ size=1
1491
+ stride=1
1492
+ pad=1
1493
+ activation=silu
1494
+
1495
+ [convolutional]
1496
+ batch_normalize=1
1497
+ size=3
1498
+ stride=1
1499
+ pad=1
1500
+ filters=320
1501
+ activation=silu
1502
+
1503
+ [convolutional]
1504
+ batch_normalize=1
1505
+ filters=320
1506
+ size=1
1507
+ stride=1
1508
+ pad=1
1509
+ activation=silu
1510
+
1511
+ [convolutional]
1512
+ batch_normalize=1
1513
+ size=3
1514
+ stride=1
1515
+ pad=1
1516
+ filters=320
1517
+ activation=silu
1518
+
1519
+ [convolutional]
1520
+ batch_normalize=1
1521
+ filters=320
1522
+ size=1
1523
+ stride=1
1524
+ pad=1
1525
+ activation=silu
1526
+
1527
+ [convolutional]
1528
+ batch_normalize=1
1529
+ size=3
1530
+ stride=1
1531
+ pad=1
1532
+ filters=320
1533
+ activation=silu
1534
+
1535
+ [route]
1536
+ layers = -1,-8
1537
+
1538
+ # Transition last
1539
+
1540
+ # 202 (previous+3+4+2k)
1541
+ [convolutional]
1542
+ batch_normalize=1
1543
+ filters=320
1544
+ size=1
1545
+ stride=1
1546
+ pad=1
1547
+ activation=silu
1548
+
1549
+ # ============ End of Neck ============ #
1550
+
1551
+ # 203
1552
+ [implicit_add]
1553
+ filters=256
1554
+
1555
+ # 204
1556
+ [implicit_add]
1557
+ filters=384
1558
+
1559
+ # 205
1560
+ [implicit_add]
1561
+ filters=512
1562
+
1563
+ # 206
1564
+ [implicit_add]
1565
+ filters=640
1566
+
1567
+ # 207
1568
+ [implicit_mul]
1569
+ filters=255
1570
+
1571
+ # 208
1572
+ [implicit_mul]
1573
+ filters=255
1574
+
1575
+ # 209
1576
+ [implicit_mul]
1577
+ filters=255
1578
+
1579
+ # 210
1580
+ [implicit_mul]
1581
+ filters=255
1582
+
1583
+ # ============ Head ============ #
1584
+
1585
+ # YOLO-3
1586
+
1587
+ [route]
1588
+ layers = 163
1589
+
1590
+ [convolutional]
1591
+ batch_normalize=1
1592
+ size=3
1593
+ stride=1
1594
+ pad=1
1595
+ filters=256
1596
+ activation=silu
1597
+
1598
+ [shift_channels]
1599
+ from=203
1600
+
1601
+ [convolutional]
1602
+ size=1
1603
+ stride=1
1604
+ pad=1
1605
+ filters=255
1606
+ activation=linear
1607
+
1608
+ [control_channels]
1609
+ from=207
1610
+
1611
+ [yolo]
1612
+ mask = 0,1,2
1613
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1614
+ classes=80
1615
+ num=12
1616
+ jitter=.3
1617
+ ignore_thresh = .7
1618
+ truth_thresh = 1
1619
+ random=1
1620
+ scale_x_y = 1.05
1621
+ iou_thresh=0.213
1622
+ cls_normalizer=1.0
1623
+ iou_normalizer=0.07
1624
+ iou_loss=ciou
1625
+ nms_kind=greedynms
1626
+ beta_nms=0.6
1627
+
1628
+
1629
+ # YOLO-4
1630
+
1631
+ [route]
1632
+ layers = 176
1633
+
1634
+ [convolutional]
1635
+ batch_normalize=1
1636
+ size=3
1637
+ stride=1
1638
+ pad=1
1639
+ filters=384
1640
+ activation=silu
1641
+
1642
+ [shift_channels]
1643
+ from=204
1644
+
1645
+ [convolutional]
1646
+ size=1
1647
+ stride=1
1648
+ pad=1
1649
+ filters=255
1650
+ activation=linear
1651
+
1652
+ [control_channels]
1653
+ from=208
1654
+
1655
+ [yolo]
1656
+ mask = 3,4,5
1657
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1658
+ classes=80
1659
+ num=12
1660
+ jitter=.3
1661
+ ignore_thresh = .7
1662
+ truth_thresh = 1
1663
+ random=1
1664
+ scale_x_y = 1.05
1665
+ iou_thresh=0.213
1666
+ cls_normalizer=1.0
1667
+ iou_normalizer=0.07
1668
+ iou_loss=ciou
1669
+ nms_kind=greedynms
1670
+ beta_nms=0.6
1671
+
1672
+
1673
+ # YOLO-5
1674
+
1675
+ [route]
1676
+ layers = 189
1677
+
1678
+ [convolutional]
1679
+ batch_normalize=1
1680
+ size=3
1681
+ stride=1
1682
+ pad=1
1683
+ filters=512
1684
+ activation=silu
1685
+
1686
+ [shift_channels]
1687
+ from=205
1688
+
1689
+ [convolutional]
1690
+ size=1
1691
+ stride=1
1692
+ pad=1
1693
+ filters=255
1694
+ activation=linear
1695
+
1696
+ [control_channels]
1697
+ from=209
1698
+
1699
+ [yolo]
1700
+ mask = 6,7,8
1701
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1702
+ classes=80
1703
+ num=12
1704
+ jitter=.3
1705
+ ignore_thresh = .7
1706
+ truth_thresh = 1
1707
+ random=1
1708
+ scale_x_y = 1.05
1709
+ iou_thresh=0.213
1710
+ cls_normalizer=1.0
1711
+ iou_normalizer=0.07
1712
+ iou_loss=ciou
1713
+ nms_kind=greedynms
1714
+ beta_nms=0.6
1715
+
1716
+
1717
+ # YOLO-6
1718
+
1719
+ [route]
1720
+ layers = 202
1721
+
1722
+ [convolutional]
1723
+ batch_normalize=1
1724
+ size=3
1725
+ stride=1
1726
+ pad=1
1727
+ filters=640
1728
+ activation=silu
1729
+
1730
+ [shift_channels]
1731
+ from=206
1732
+
1733
+ [convolutional]
1734
+ size=1
1735
+ stride=1
1736
+ pad=1
1737
+ filters=255
1738
+ activation=linear
1739
+
1740
+ [control_channels]
1741
+ from=210
1742
+
1743
+ [yolo]
1744
+ mask = 9,10,11
1745
+ anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
1746
+ classes=80
1747
+ num=12
1748
+ jitter=.3
1749
+ ignore_thresh = .7
1750
+ truth_thresh = 1
1751
+ random=1
1752
+ scale_x_y = 1.05
1753
+ iou_thresh=0.213
1754
+ cls_normalizer=1.0
1755
+ iou_normalizer=0.07
1756
+ iou_loss=ciou
1757
+ nms_kind=greedynms
1758
+ beta_nms=0.6
1759
+
1760
+ # ============ End of Head ============ #
asone/detectors/yolor/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
asone/detectors/yolor/models/common.py ADDED
@@ -0,0 +1,1023 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file contains modules common to various models
2
+
3
+ import math
4
+
5
+ import numpy as np
6
+ import torch
7
+ import torch.nn as nn
8
+ from PIL import Image, ImageDraw
9
+
10
+ from asone.detectors.yolor.utils.datasets import letterbox
11
+ from asone.detectors.yolor.utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
12
+ from asone.detectors.yolor.utils.plots import color_list
13
+
14
+ try:
15
+ from pytorch_wavelets import DWTForward, DWTInverse
16
+
17
+ class DWT(nn.Module):
18
+ def __init__(self):
19
+ super(DWT, self).__init__()
20
+ self.xfm = DWTForward(J=1, wave='db1', mode='zero')
21
+
22
+ def forward(self, x):
23
+ b,c,w,h = x.shape
24
+ yl, yh = self.xfm(x)
25
+ return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1)
26
+ except:
27
+
28
+ class DWT(nn.Module): # use ReOrg instead
29
+ def __init__(self):
30
+ super(DWT, self).__init__()
31
+
32
+ def forward(self, x):
33
+ return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
34
+
35
+
36
+ class ImplicitA(nn.Module):
37
+ def __init__(self, channel):
38
+ super(ImplicitA, self).__init__()
39
+ self.channel = channel
40
+ self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
41
+ nn.init.normal_(self.implicit, std=.02)
42
+
43
+ def forward(self, x):
44
+ return self.implicit.expand_as(x) + x
45
+
46
+
47
+ class ImplicitM(nn.Module):
48
+ def __init__(self, channel):
49
+ super(ImplicitM, self).__init__()
50
+ self.channel = channel
51
+ self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1))
52
+ nn.init.normal_(self.implicit, mean=1., std=.02)
53
+
54
+ def forward(self, x):
55
+ return self.implicit.expand_as(x) * x
56
+
57
+
58
+ class ReOrg(nn.Module):
59
+ def __init__(self):
60
+ super(ReOrg, self).__init__()
61
+
62
+ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
63
+ return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
64
+
65
+ def autopad(k, p=None): # kernel, padding
66
+ # Pad to 'same'
67
+ if p is None:
68
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
69
+ return p
70
+
71
+
72
+ def DWConv(c1, c2, k=1, s=1, act=True):
73
+ # Depthwise convolution
74
+ return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
75
+
76
+
77
+ class Conv(nn.Module):
78
+ # Standard convolution
79
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
80
+ super(Conv, self).__init__()
81
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
82
+ self.bn = nn.BatchNorm2d(c2)
83
+ self.act = nn.SiLU() if act else nn.Identity()
84
+
85
+ def forward(self, x):
86
+ return self.act(self.bn(self.conv(x)))
87
+
88
+ def fuseforward(self, x):
89
+ return self.act(self.conv(x))
90
+
91
+
92
+ class ConvSig(nn.Module):
93
+ # Standard convolution
94
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
95
+ super(ConvSig, self).__init__()
96
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
97
+ self.act = nn.Sigmoid() if act else nn.Identity()
98
+
99
+ def forward(self, x):
100
+ return self.act(self.conv(x))
101
+
102
+ def fuseforward(self, x):
103
+ return self.act(self.conv(x))
104
+
105
+
106
+ class ConvSqu(nn.Module):
107
+ # Standard convolution
108
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
109
+ super(ConvSqu, self).__init__()
110
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
111
+ self.act = nn.SiLU() if act else nn.Identity()
112
+
113
+ def forward(self, x):
114
+ return self.act(self.conv(x))
115
+
116
+ def fuseforward(self, x):
117
+ return self.act(self.conv(x))
118
+
119
+
120
+ class Bottleneck(nn.Module):
121
+ # Standard bottleneck
122
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
123
+ super(Bottleneck, self).__init__()
124
+ c_ = int(c2 * e) # hidden channels
125
+ self.cv1 = Conv(c1, c_, 1, 1)
126
+ self.cv2 = Conv(c_, c2, 3, 1, g=g)
127
+ self.add = shortcut and c1 == c2
128
+
129
+ def forward(self, x):
130
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
131
+
132
+
133
+ class BottleneckG(nn.Module):
134
+ # Standard bottleneck
135
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
136
+ super(BottleneckG, self).__init__()
137
+ c_ = int(c2 * e) # hidden channels
138
+ self.cv1 = Conv(c1, c_, 1, 1, g=g)
139
+ self.cv2 = Conv(c_, c2, 3, 1, g=g)
140
+ self.add = shortcut and c1 == c2
141
+
142
+ def forward(self, x):
143
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
144
+
145
+
146
+ class BottleneckCSP(nn.Module):
147
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
148
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
149
+ super(BottleneckCSP, self).__init__()
150
+ c_ = int(c2 * e) # hidden channels
151
+ self.cv1 = Conv(c1, c_, 1, 1)
152
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
153
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
154
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
155
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
156
+ self.act = nn.SiLU()
157
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
158
+
159
+ def forward(self, x):
160
+ y1 = self.cv3(self.m(self.cv1(x)))
161
+ y2 = self.cv2(x)
162
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
163
+
164
+
165
+ class BottleneckCSPF(nn.Module):
166
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
167
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
168
+ super(BottleneckCSPF, self).__init__()
169
+ c_ = int(c2 * e) # hidden channels
170
+ self.cv1 = Conv(c1, c_, 1, 1)
171
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
172
+ #self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
173
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
174
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
175
+ self.act = nn.SiLU()
176
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
177
+
178
+ def forward(self, x):
179
+ y1 = self.m(self.cv1(x))
180
+ y2 = self.cv2(x)
181
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
182
+
183
+
184
+ class BottleneckCSPL(nn.Module):
185
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
186
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
187
+ super(BottleneckCSPL, self).__init__()
188
+ c_ = int(c2 * e) # hidden channels
189
+ self.cv1 = Conv(c1, c_, 1, 1)
190
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
191
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
192
+ #self.cv4 = Conv(2 * c_, c2, 1, 1)
193
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
194
+ self.act = nn.SiLU()
195
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
196
+
197
+ def forward(self, x):
198
+ y1 = self.cv3(self.m(self.cv1(x)))
199
+ y2 = self.cv2(x)
200
+ return self.act(self.bn(torch.cat((y1, y2), dim=1)))
201
+
202
+
203
+ class BottleneckCSPLG(nn.Module):
204
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
205
+ def __init__(self, c1, c2, n=1, shortcut=True, g=3, e=0.25): # ch_in, ch_out, number, shortcut, groups, expansion
206
+ super(BottleneckCSPLG, self).__init__()
207
+ c_ = int(c2 * e) # hidden channels
208
+ self.cv1 = Conv(c1, g*c_, 1, 1)
209
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
210
+ self.cv3 = nn.Conv2d(g*c_, g*c_, 1, 1, groups=g, bias=False)
211
+ #self.cv4 = Conv(2 * c_, c2, 1, 1)
212
+ self.bn = nn.BatchNorm2d((1+g) * c_) # applied to cat(cv2, cv3)
213
+ self.act = nn.SiLU()
214
+ self.m = nn.Sequential(*[BottleneckG(g*c_, g*c_, shortcut, g, e=1.0) for _ in range(n)])
215
+
216
+ def forward(self, x):
217
+ y1 = self.cv3(self.m(self.cv1(x)))
218
+ y2 = self.cv2(x)
219
+ return self.act(self.bn(torch.cat((y1, y2), dim=1)))
220
+
221
+
222
+ class BottleneckCSPSE(nn.Module):
223
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
224
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
225
+ super(BottleneckCSPSE, self).__init__()
226
+ c_ = int(c2 * e) # hidden channels
227
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
228
+ self.cs = ConvSqu(c1, c1//8, 1, 1)
229
+ self.cvsig = ConvSig(c1//8, c1, 1, 1)
230
+ self.cv1 = Conv(c1, c_, 1, 1)
231
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
232
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
233
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
234
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
235
+ self.act = nn.SiLU()
236
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
237
+
238
+ def forward(self, x):
239
+ x = x * self.cvsig(self.cs(self.avg_pool(x))).expand_as(x)
240
+ y1 = self.cv3(self.m(self.cv1(x)))
241
+ y2 = self.cv2(x)
242
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
243
+
244
+
245
+ class BottleneckCSPSEA(nn.Module):
246
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
247
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
248
+ super(BottleneckCSPSEA, self).__init__()
249
+ c_ = int(c2 * e) # hidden channels
250
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
251
+ self.cs = ConvSqu(c1, c1//8, 1, 1)
252
+ self.cvsig = ConvSig(c1//8, c1, 1, 1)
253
+ self.cv1 = Conv(c1, c_, 1, 1)
254
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
255
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
256
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
257
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
258
+ self.act = nn.SiLU()
259
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
260
+
261
+ def forward(self, x):
262
+ x = x + x * self.cvsig(self.cs(self.avg_pool(x))).expand_as(x)
263
+ y1 = self.cv3(self.m(self.cv1(x)))
264
+ y2 = self.cv2(x)
265
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
266
+
267
+
268
+ class BottleneckCSPSAM(nn.Module):
269
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
270
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
271
+ super(BottleneckCSPSAM, self).__init__()
272
+ c_ = int(c2 * e) # hidden channels
273
+ self.cvsig = ConvSig(c1, c1, 1, 1)
274
+ self.cv1 = Conv(c1, c_, 1, 1)
275
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
276
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
277
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
278
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
279
+ self.act = nn.SiLU()
280
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
281
+
282
+ def forward(self, x):
283
+ x = x * self.cvsig(x)
284
+ y1 = self.cv3(self.m(self.cv1(x)))
285
+ y2 = self.cv2(x)
286
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
287
+
288
+
289
+ class BottleneckCSPSAMA(nn.Module):
290
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
291
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
292
+ super(BottleneckCSPSAMA, self).__init__()
293
+ c_ = int(c2 * e) # hidden channels
294
+ self.cvsig = ConvSig(c1, c1, 1, 1)
295
+ self.cv1 = Conv(c1, c_, 1, 1)
296
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
297
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
298
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
299
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
300
+ self.act = nn.SiLU()
301
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
302
+
303
+ def forward(self, x):
304
+ x = x + x * self.cvsig(x)
305
+ y1 = self.cv3(self.m(self.cv1(x)))
306
+ y2 = self.cv2(x)
307
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
308
+
309
+
310
+ class BottleneckCSPSAMB(nn.Module):
311
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
312
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
313
+ super(BottleneckCSPSAMB, self).__init__()
314
+ c_ = int(c2 * e) # hidden channels
315
+ self.cvsig = ConvSig(c2, c2, 1, 1)
316
+ self.cv1 = Conv(c1, c_, 1, 1)
317
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
318
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
319
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
320
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
321
+ self.act = nn.SiLU()
322
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
323
+
324
+ def forward(self, x):
325
+ y1 = self.cv3(self.m(self.cv1(x)))
326
+ y2 = self.cv2(x)
327
+ y = self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
328
+ return y * self.cvsig(y)
329
+
330
+
331
+ class BottleneckCSPGC(nn.Module):
332
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
333
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
334
+ super(BottleneckCSPGC, self).__init__()
335
+ c_ = int(c2 * e) # hidden channels
336
+ self.cv1 = Conv(c1, c_, 1, 1)
337
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
338
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
339
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
340
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
341
+ self.act = nn.SiLU()
342
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
343
+
344
+ self.channel_add_conv = nn.Sequential(
345
+ nn.Conv2d(c2, c2, kernel_size=1),
346
+ nn.LayerNorm([c2, 1, 1]),
347
+ nn.ReLU(inplace=True), # yapf: disable
348
+ nn.Conv2d(c2, c2, kernel_size=1))
349
+
350
+ self.conv_mask = nn.Conv2d(c2, 1, kernel_size=1)
351
+ self.softmax = nn.Softmax(dim=2)
352
+
353
+ def spatial_pool(self, x):
354
+
355
+ batch, channel, height, width = x.size()
356
+
357
+ input_x = x
358
+ # [N, C, H * W]
359
+ input_x = input_x.view(batch, channel, height * width)
360
+ # [N, 1, C, H * W]
361
+ input_x = input_x.unsqueeze(1)
362
+ # [N, 1, H, W]
363
+ context_mask = self.conv_mask(x)
364
+ # [N, 1, H * W]
365
+ context_mask = context_mask.view(batch, 1, height * width)
366
+ # [N, 1, H * W]
367
+ context_mask = self.softmax(context_mask)
368
+ # [N, 1, H * W, 1]
369
+ context_mask = context_mask.unsqueeze(-1)
370
+ # [N, 1, C, 1]
371
+ context = torch.matmul(input_x, context_mask)
372
+ # [N, C, 1, 1]
373
+ context = context.view(batch, channel, 1, 1)
374
+
375
+ return context
376
+
377
+ def forward(self, x):
378
+ y1 = self.cv3(self.m(self.cv1(x)))
379
+ y2 = self.cv2(x)
380
+ y = self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
381
+
382
+ return y + self.channel_add_conv(self.spatial_pool(y))
383
+
384
+
385
+ class BottleneckCSPDNL(nn.Module):
386
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
387
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
388
+ super(BottleneckCSPDNL, self).__init__()
389
+ c_ = int(c2 * e) # hidden channels
390
+ self.cv1 = Conv(c1, c_, 1, 1)
391
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
392
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
393
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
394
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
395
+ self.act = nn.SiLU()
396
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
397
+
398
+
399
+ self.conv_query = nn.Conv2d(c2, c2, kernel_size=1)
400
+ self.conv_key = nn.Conv2d(c2, c2, kernel_size=1)
401
+ self.conv_value = nn.Conv2d(c2, c2, kernel_size=1, bias=False)
402
+ self.conv_out = None
403
+ self.scale = math.sqrt(c2)
404
+ self.temperature = 0.05
405
+ self.softmax = nn.Softmax(dim=2)
406
+ self.gamma = nn.Parameter(torch.zeros(1))
407
+ self.conv_mask = nn.Conv2d(c2, 1, kernel_size=1)
408
+
409
+ def forward(self, x):
410
+ y1 = self.cv3(self.m(self.cv1(x)))
411
+ y2 = self.cv2(x)
412
+ y = self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
413
+
414
+ # [N, C, T, H, W]
415
+ residual = y
416
+ # [N, C, T, H', W']
417
+ input_x = y
418
+ # [N, C', T, H, W]
419
+ query = self.conv_query(y)
420
+ # [N, C', T, H', W']
421
+ key = self.conv_key(input_x)
422
+ value = self.conv_value(input_x)
423
+ # [N, C', H x W]
424
+ query = query.view(query.size(0), query.size(1), -1)
425
+ # [N, C', H' x W']
426
+ key = key.view(key.size(0), key.size(1), -1)
427
+ value = value.view(value.size(0), value.size(1), -1)
428
+ # channel whitening
429
+ key_mean = key.mean(2).unsqueeze(2)
430
+ query_mean = query.mean(2).unsqueeze(2)
431
+ key -= key_mean
432
+ query -= query_mean
433
+ # [N, T x H x W, T x H' x W']
434
+ sim_map = torch.bmm(query.transpose(1, 2), key)
435
+ sim_map = sim_map/self.scale
436
+ sim_map = sim_map/self.temperature
437
+ sim_map = self.softmax(sim_map)
438
+ # [N, T x H x W, C']
439
+ out_sim = torch.bmm(sim_map, value.transpose(1, 2))
440
+ # [N, C', T x H x W]
441
+ out_sim = out_sim.transpose(1, 2)
442
+ # [N, C', T, H, W]
443
+ out_sim = out_sim.view(out_sim.size(0), out_sim.size(1), *y.size()[2:]).contiguous()
444
+ out_sim = self.gamma * out_sim
445
+ # [N, 1, H', W']
446
+ mask = self.conv_mask(input_x)
447
+ # [N, 1, H'x W']
448
+ mask = mask.view(mask.size(0), mask.size(1), -1)
449
+ mask = self.softmax(mask)
450
+ # [N, C, 1, 1]
451
+ out_gc = torch.bmm(value, mask.permute(0,2,1)).unsqueeze(-1).contiguous()
452
+
453
+ return out_sim + out_gc + residual
454
+
455
+
456
+ class BottleneckCSP2(nn.Module):
457
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
458
+ def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
459
+ super(BottleneckCSP2, self).__init__()
460
+ c_ = int(c2) # hidden channels
461
+ self.cv1 = Conv(c1, c_, 1, 1)
462
+ self.cv2 = nn.Conv2d(c_, c_, 1, 1, bias=False)
463
+ self.cv3 = Conv(2 * c_, c2, 1, 1)
464
+ self.bn = nn.BatchNorm2d(2 * c_)
465
+ self.act = nn.SiLU()
466
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
467
+
468
+ def forward(self, x):
469
+ x1 = self.cv1(x)
470
+ y1 = self.m(x1)
471
+ y2 = self.cv2(x1)
472
+ return self.cv3(self.act(self.bn(torch.cat((y1, y2), dim=1))))
473
+
474
+
475
+ class BottleneckCSP2SAM(nn.Module):
476
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
477
+ def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
478
+ super(BottleneckCSP2SAM, self).__init__()
479
+ c_ = int(c2) # hidden channels
480
+ self.cv1 = Conv(c1, c_, 1, 1)
481
+ self.cvsig = ConvSig(c_, c_, 1, 1)
482
+ self.cv2 = nn.Conv2d(c_, c_, 1, 1, bias=False)
483
+ self.cv3 = Conv(2 * c_, c2, 1, 1)
484
+ self.bn = nn.BatchNorm2d(2 * c_)
485
+ self.act = nn.SiLU()
486
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
487
+
488
+ def forward(self, x):
489
+ x1 = self.cv1(x)
490
+ x1 = x1 * self.cvsig(x1).contiguous()
491
+ y1 = self.m(x1)
492
+ y2 = self.cv2(x1)
493
+ return self.cv3(self.act(self.bn(torch.cat((y1, y2), dim=1))))
494
+
495
+
496
+ class VoVCSP(nn.Module):
497
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
498
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
499
+ super(VoVCSP, self).__init__()
500
+ c_ = int(c2) # hidden channels
501
+ self.cv1 = Conv(c1//2, c_//2, 3, 1)
502
+ self.cv2 = Conv(c_//2, c_//2, 3, 1)
503
+ self.cv3 = Conv(c_, c2, 1, 1)
504
+
505
+ def forward(self, x):
506
+ _, x1 = x.chunk(2, dim=1)
507
+ x1 = self.cv1(x1)
508
+ x2 = self.cv2(x1)
509
+ return self.cv3(torch.cat((x1,x2), dim=1))
510
+
511
+
512
+ class SPP(nn.Module):
513
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
514
+ def __init__(self, c1, c2, k=(5, 9, 13)):
515
+ super(SPP, self).__init__()
516
+ c_ = c1 // 2 # hidden channels
517
+ self.cv1 = Conv(c1, c_, 1, 1)
518
+ self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
519
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
520
+
521
+ def forward(self, x):
522
+ x = self.cv1(x)
523
+ return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
524
+
525
+
526
+ class SPPCSP(nn.Module):
527
+ # CSP SPP https://github.com/WongKinYiu/CrossStagePartialNetworks
528
+ def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
529
+ super(SPPCSP, self).__init__()
530
+ c_ = int(2 * c2 * e) # hidden channels
531
+ self.cv1 = Conv(c1, c_, 1, 1)
532
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
533
+ self.cv3 = Conv(c_, c_, 3, 1)
534
+ self.cv4 = Conv(c_, c_, 1, 1)
535
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
536
+ self.cv5 = Conv(4 * c_, c_, 1, 1)
537
+ self.cv6 = Conv(c_, c_, 3, 1)
538
+ self.bn = nn.BatchNorm2d(2 * c_)
539
+ self.act = nn.SiLU()
540
+ self.cv7 = Conv(2 * c_, c2, 1, 1)
541
+
542
+ def forward(self, x):
543
+ x1 = self.cv4(self.cv3(self.cv1(x)))
544
+ y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
545
+ y2 = self.cv2(x)
546
+ return self.cv7(self.act(self.bn(torch.cat((y1, y2), dim=1))))
547
+
548
+
549
+ class Focus(nn.Module):
550
+ # Focus wh information into c-space
551
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
552
+ super(Focus, self).__init__()
553
+ self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
554
+
555
+ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
556
+ return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
557
+
558
+
559
+ class MP(nn.Module):
560
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
561
+ def __init__(self, k=2):
562
+ super(MP, self).__init__()
563
+ self.m = nn.MaxPool2d(kernel_size=k, stride=k)
564
+
565
+ def forward(self, x):
566
+ return self.m(x)
567
+
568
+
569
+ class DownD(nn.Module):
570
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
571
+ def __init__(self, c1, c2, n=1, k=2):
572
+ super(DownD, self).__init__()
573
+ c_ = int(c1) # hidden channels
574
+ self.cv1 = Conv(c1, c_, 1, 1)
575
+ self.cv2 = Conv(c_, c_, 3, k)
576
+ self.cv3 = Conv(c_, c2, 1, 1)
577
+ self.cv4 = Conv(c1, c2, 1, 1)
578
+ self.ap = nn.AvgPool2d(kernel_size=k, stride=k)
579
+
580
+ def forward(self, x):
581
+ return self.cv3(self.cv2(self.cv1(x))) + self.cv4(self.ap(x))
582
+
583
+
584
+ class DownC(nn.Module):
585
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
586
+ def __init__(self, c1, c2, n=1, k=2):
587
+ super(DownC, self).__init__()
588
+ c_ = int(c1) # hidden channels
589
+ self.cv1 = Conv(c1, c_, 1, 1)
590
+ self.cv2 = Conv(c_, c2//2, 3, k)
591
+ self.cv3 = Conv(c1, c2//2, 1, 1)
592
+ self.mp = nn.MaxPool2d(kernel_size=k, stride=k)
593
+
594
+ def forward(self, x):
595
+ return torch.cat((self.cv2(self.cv1(x)), self.cv3(self.mp(x))), dim=1)
596
+
597
+
598
+ class DNL(nn.Module):
599
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
600
+ def __init__(self, c1, c2, k=3, s=1):
601
+ super(DNL, self).__init__()
602
+ c_ = int(c1) # hidden channels
603
+
604
+ #
605
+ self.conv_query = nn.Conv2d(c1, c_, kernel_size=1)
606
+ self.conv_key = nn.Conv2d(c1, c_, kernel_size=1)
607
+
608
+ self.conv_value = nn.Conv2d(c1, c1, kernel_size=1, bias=False)
609
+ self.conv_out = None
610
+
611
+ self.scale = math.sqrt(c_)
612
+ self.temperature = 0.05
613
+
614
+ self.softmax = nn.Softmax(dim=2)
615
+
616
+ self.gamma = nn.Parameter(torch.zeros(1))
617
+
618
+ self.conv_mask = nn.Conv2d(c1, 1, kernel_size=1)
619
+
620
+ self.cv = Conv(c1, c2, k, s)
621
+
622
+ def forward(self, x):
623
+
624
+ # [N, C, T, H, W]
625
+ residual = x
626
+
627
+ # [N, C, T, H', W']
628
+ input_x = x
629
+
630
+ # [N, C', T, H, W]
631
+ query = self.conv_query(x)
632
+
633
+ # [N, C', T, H', W']
634
+ key = self.conv_key(input_x)
635
+ value = self.conv_value(input_x)
636
+
637
+ # [N, C', H x W]
638
+ query = query.view(query.size(0), query.size(1), -1)
639
+
640
+ # [N, C', H' x W']
641
+ key = key.view(key.size(0), key.size(1), -1)
642
+ value = value.view(value.size(0), value.size(1), -1)
643
+
644
+ # channel whitening
645
+ key_mean = key.mean(2).unsqueeze(2)
646
+ query_mean = query.mean(2).unsqueeze(2)
647
+ key -= key_mean
648
+ query -= query_mean
649
+
650
+ # [N, T x H x W, T x H' x W']
651
+ sim_map = torch.bmm(query.transpose(1, 2), key)
652
+ sim_map = sim_map/self.scale
653
+ sim_map = sim_map/self.temperature
654
+ sim_map = self.softmax(sim_map)
655
+
656
+ # [N, T x H x W, C']
657
+ out_sim = torch.bmm(sim_map, value.transpose(1, 2))
658
+
659
+ # [N, C', T x H x W]
660
+ out_sim = out_sim.transpose(1, 2)
661
+
662
+ # [N, C', T, H, W]
663
+ out_sim = out_sim.view(out_sim.size(0), out_sim.size(1), *x.size()[2:])
664
+ out_sim = self.gamma * out_sim
665
+
666
+ # [N, 1, H', W']
667
+ mask = self.conv_mask(input_x)
668
+ # [N, 1, H'x W']
669
+ mask = mask.view(mask.size(0), mask.size(1), -1)
670
+ mask = self.softmax(mask)
671
+ # [N, C, 1, 1]
672
+ out_gc = torch.bmm(value, mask.permute(0,2,1)).unsqueeze(-1)
673
+ out_sim = out_sim+out_gc
674
+
675
+ return self.cv(out_sim + residual)
676
+
677
+
678
+ class GC(nn.Module):
679
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
680
+ def __init__(self, c1, c2, k=3, s=1):
681
+ super(GC, self).__init__()
682
+ c_ = int(c1) # hidden channels
683
+
684
+ #
685
+ self.channel_add_conv = nn.Sequential(
686
+ nn.Conv2d(c1, c_, kernel_size=1),
687
+ nn.LayerNorm([c_, 1, 1]),
688
+ nn.ReLU(inplace=True), # yapf: disable
689
+ nn.Conv2d(c_, c1, kernel_size=1))
690
+
691
+ self.conv_mask = nn.Conv2d(c_, 1, kernel_size=1)
692
+ self.softmax = nn.Softmax(dim=2)
693
+
694
+ self.cv = Conv(c1, c2, k, s)
695
+
696
+
697
+ def spatial_pool(self, x):
698
+
699
+ batch, channel, height, width = x.size()
700
+
701
+ input_x = x
702
+ # [N, C, H * W]
703
+ input_x = input_x.view(batch, channel, height * width)
704
+ # [N, 1, C, H * W]
705
+ input_x = input_x.unsqueeze(1)
706
+ # [N, 1, H, W]
707
+ context_mask = self.conv_mask(x)
708
+ # [N, 1, H * W]
709
+ context_mask = context_mask.view(batch, 1, height * width)
710
+ # [N, 1, H * W]
711
+ context_mask = self.softmax(context_mask)
712
+ # [N, 1, H * W, 1]
713
+ context_mask = context_mask.unsqueeze(-1)
714
+ # [N, 1, C, 1]
715
+ context = torch.matmul(input_x, context_mask)
716
+ # [N, C, 1, 1]
717
+ context = context.view(batch, channel, 1, 1)
718
+
719
+ return context
720
+
721
+ def forward(self, x):
722
+
723
+ return self.cv(x + self.channel_add_conv(self.spatial_pool(x)))
724
+
725
+
726
+ class SAM(nn.Module):
727
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
728
+ def __init__(self, c1, c2, k=3, s=1):
729
+ super(SAM, self).__init__()
730
+ c_ = int(c1) # hidden channels
731
+ self.cvsig = ConvSig(c1, c1, 1, 1)
732
+ self.cv = Conv(c1, c2, k, s)
733
+
734
+ def forward(self, x):
735
+
736
+ return self.cv(x * self.cvsig(x))
737
+
738
+
739
+ class SAMA(nn.Module):
740
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
741
+ def __init__(self, c1, c2, k=3, s=1):
742
+ super(SAMA, self).__init__()
743
+ c_ = int(c1) # hidden channels
744
+ self.cvsig = ConvSig(c1, c1, 1, 1)
745
+ self.cv = Conv(c1, c2, k, s)
746
+
747
+ def forward(self, x):
748
+
749
+ return self.cv(x + x * self.cvsig(x))
750
+
751
+
752
+ class SAMB(nn.Module):
753
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
754
+ def __init__(self, c1, c2, k=3, s=1):
755
+ super(SAMB, self).__init__()
756
+ c_ = int(c1) # hidden channels
757
+ self.cv = Conv(c1, c2, k, s)
758
+ self.cvsig = ConvSig(c2, c2, 1, 1)
759
+
760
+ def forward(self, x):
761
+
762
+ x = self.cv(x)
763
+
764
+ return x * self.cvsig(x)
765
+
766
+
767
+ class Concat(nn.Module):
768
+ # Concatenate a list of tensors along dimension
769
+ def __init__(self, dimension=1):
770
+ super(Concat, self).__init__()
771
+ self.d = dimension
772
+
773
+ def forward(self, x):
774
+ return torch.cat(x, self.d)
775
+
776
+
777
+ class NMS(nn.Module):
778
+ # Non-Maximum Suppression (NMS) module
779
+ conf = 0.25 # confidence threshold
780
+ iou = 0.45 # IoU threshold
781
+ classes = None # (optional list) filter by class
782
+
783
+ def __init__(self):
784
+ super(NMS, self).__init__()
785
+
786
+ def forward(self, x):
787
+ return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
788
+
789
+
790
+ class autoShape(nn.Module):
791
+ # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
792
+ img_size = 640 # inference size (pixels)
793
+ conf = 0.25 # NMS confidence threshold
794
+ iou = 0.45 # NMS IoU threshold
795
+ classes = None # (optional list) filter by class
796
+
797
+ def __init__(self, model):
798
+ super(autoShape, self).__init__()
799
+ self.model = model.eval()
800
+
801
+ def forward(self, imgs, size=640, augment=False, profile=False):
802
+ # supports inference from various sources. For height=720, width=1280, RGB images example inputs are:
803
+ # opencv: imgs = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3)
804
+ # PIL: imgs = Image.open('image.jpg') # HWC x(720,1280,3)
805
+ # numpy: imgs = np.zeros((720,1280,3)) # HWC
806
+ # torch: imgs = torch.zeros(16,3,720,1280) # BCHW
807
+ # multiple: imgs = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
808
+
809
+ p = next(self.model.parameters()) # for device and type
810
+ if isinstance(imgs, torch.Tensor): # torch
811
+ return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
812
+
813
+ # Pre-process
814
+ if not isinstance(imgs, list):
815
+ imgs = [imgs]
816
+ shape0, shape1 = [], [] # image and inference shapes
817
+ batch = range(len(imgs)) # batch size
818
+ for i in batch:
819
+ imgs[i] = np.array(imgs[i]) # to numpy
820
+ imgs[i] = imgs[i][:, :, :3] if imgs[i].ndim == 3 else np.tile(imgs[i][:, :, None], 3) # enforce 3ch input
821
+ s = imgs[i].shape[:2] # HWC
822
+ shape0.append(s) # image shape
823
+ g = (size / max(s)) # gain
824
+ shape1.append([y * g for y in s])
825
+ shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
826
+ x = [letterbox(imgs[i], new_shape=shape1, auto=False)[0] for i in batch] # pad
827
+ x = np.stack(x, 0) if batch[-1] else x[0][None] # stack
828
+ x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
829
+ x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
830
+
831
+ # Inference
832
+ with torch.no_grad():
833
+ y = self.model(x, augment, profile)[0] # forward
834
+ y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
835
+
836
+ # Post-process
837
+ for i in batch:
838
+ if y[i] is not None:
839
+ y[i][:, :4] = scale_coords(shape1, y[i][:, :4], shape0[i])
840
+
841
+ return Detections(imgs, y, self.names)
842
+
843
+
844
+ class Detections:
845
+ # detections class for YOLOv5 inference results
846
+ def __init__(self, imgs, pred, names=None):
847
+ super(Detections, self).__init__()
848
+ self.imgs = imgs # list of images as numpy arrays
849
+ self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
850
+ self.names = names # class names
851
+ self.xyxy = pred # xyxy pixels
852
+ self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
853
+ gn = [torch.Tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.]) for im in imgs] # normalization gains
854
+ self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
855
+ self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
856
+
857
+ def display(self, pprint=False, show=False, save=False):
858
+ colors = color_list()
859
+ for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
860
+ str = f'Image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
861
+ if pred is not None:
862
+ for c in pred[:, -1].unique():
863
+ n = (pred[:, -1] == c).sum() # detections per class
864
+ str += f'{n} {self.names[int(c)]}s, ' # add to string
865
+ if show or save:
866
+ img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
867
+ for *box, conf, cls in pred: # xyxy, confidence, class
868
+ # str += '%s %.2f, ' % (names[int(cls)], conf) # label
869
+ ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot
870
+ if save:
871
+ f = f'results{i}.jpg'
872
+ str += f"saved to '{f}'"
873
+ img.save(f) # save
874
+ if show:
875
+ img.show(f'Image {i}') # show
876
+ if pprint:
877
+ print(str)
878
+
879
+ def print(self):
880
+ self.display(pprint=True) # print results
881
+
882
+ def show(self):
883
+ self.display(show=True) # show results
884
+
885
+ def save(self):
886
+ self.display(save=True) # save results
887
+
888
+
889
+ class Flatten(nn.Module):
890
+ # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
891
+ @staticmethod
892
+ def forward(x):
893
+ return x.view(x.size(0), -1)
894
+
895
+
896
+ class Classify(nn.Module):
897
+ # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
898
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
899
+ super(Classify, self).__init__()
900
+ self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
901
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1)
902
+ self.flat = Flatten()
903
+
904
+ def forward(self, x):
905
+ z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
906
+ return self.flat(self.conv(z)) # flatten to x(b,c2)
907
+
908
+
909
+ class TransformerLayer(nn.Module):
910
+ def __init__(self, c, num_heads):
911
+ super().__init__()
912
+
913
+ self.ln1 = nn.LayerNorm(c)
914
+ self.q = nn.Linear(c, c, bias=False)
915
+ self.k = nn.Linear(c, c, bias=False)
916
+ self.v = nn.Linear(c, c, bias=False)
917
+ self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
918
+ self.ln2 = nn.LayerNorm(c)
919
+ self.fc1 = nn.Linear(c, c, bias=False)
920
+ self.fc2 = nn.Linear(c, c, bias=False)
921
+
922
+ def forward(self, x):
923
+ x_ = self.ln1(x)
924
+ x = self.ma(self.q(x_), self.k(x_), self.v(x_))[0] + x
925
+ x = self.ln2(x)
926
+ x = self.fc2(self.fc1(x)) + x
927
+ return x
928
+
929
+
930
+ class TransformerBlock(nn.Module):
931
+ def __init__(self, c1, c2, num_heads, num_layers):
932
+ super().__init__()
933
+
934
+ self.conv = None
935
+ if c1 != c2:
936
+ self.conv = Conv(c1, c2)
937
+ self.linear = nn.Linear(c2, c2)
938
+ self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
939
+ self.c2 = c2
940
+
941
+ def forward(self, x):
942
+ if self.conv is not None:
943
+ x = self.conv(x)
944
+ b, _, w, h = x.shape
945
+ p = x.flatten(2)
946
+ p = p.unsqueeze(0)
947
+ p = p.transpose(0, 3)
948
+ p = p.squeeze(3)
949
+ e = self.linear(p)
950
+ x = p + e
951
+
952
+ x = self.tr(x)
953
+ x = x.unsqueeze(3)
954
+ x = x.transpose(0, 3)
955
+ x = x.reshape(b, self.c2, w, h)
956
+ return x
957
+
958
+
959
+
960
+ class BottleneckCSPTR(nn.Module):
961
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
962
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
963
+ super(BottleneckCSPTR, self).__init__()
964
+ c_ = int(c2 * e) # hidden channels
965
+ self.cv1 = Conv(c1, c_, 1, 1)
966
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
967
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
968
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
969
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
970
+ self.act = nn.SiLU()
971
+ self.m = TransformerBlock(c_, c_, 4, n)
972
+
973
+ def forward(self, x):
974
+ y1 = self.cv3(self.m(self.cv1(x)))
975
+ y2 = self.cv2(x)
976
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
977
+
978
+ class BottleneckCSP2TR(nn.Module):
979
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
980
+ def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
981
+ super(BottleneckCSP2TR, self).__init__()
982
+ c_ = int(c2) # hidden channels
983
+ self.cv1 = Conv(c1, c_, 1, 1)
984
+ self.cv2 = nn.Conv2d(c_, c_, 1, 1, bias=False)
985
+ self.cv3 = Conv(2 * c_, c2, 1, 1)
986
+ self.bn = nn.BatchNorm2d(2 * c_)
987
+ self.act = nn.SiLU()
988
+ self.m = TransformerBlock(c_, c_, 4, n)
989
+
990
+ def forward(self, x):
991
+ x1 = self.cv1(x)
992
+ y1 = self.m(x1)
993
+ y2 = self.cv2(x1)
994
+ return self.cv3(self.act(self.bn(torch.cat((y1, y2), dim=1))))
995
+
996
+
997
+ class SPPCSPTR(nn.Module):
998
+ # CSP SPP https://github.com/WongKinYiu/CrossStagePartialNetworks
999
+ def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
1000
+ super(SPPCSPTR, self).__init__()
1001
+ c_ = int(2 * c2 * e) # hidden channels
1002
+ self.cv1 = Conv(c1, c_, 1, 1)
1003
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
1004
+ self.cv3 = Conv(c_, c_, 3, 1)
1005
+ self.cv4 = Conv(c_, c_, 1, 1)
1006
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
1007
+ self.cv5 = Conv(4 * c_, c_, 1, 1)
1008
+ self.cv6 = TransformerBlock(c_, c_, 4, 1)
1009
+ self.bn = nn.BatchNorm2d(2 * c_)
1010
+ self.act = nn.SiLU()
1011
+ self.cv7 = Conv(2 * c_, c2, 1, 1)
1012
+
1013
+ def forward(self, x):
1014
+ x1 = self.cv4(self.cv3(self.cv1(x)))
1015
+ y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
1016
+ y2 = self.cv2(x)
1017
+ return self.cv7(self.act(self.bn(torch.cat((y1, y2), dim=1))))
1018
+
1019
+ class TR(BottleneckCSPTR):
1020
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
1021
+ super().__init__(c1, c2, n, shortcut, g, e)
1022
+ c_ = int(c2 * e)
1023
+ self.m = TransformerBlock(c_, c_, 4, n)
asone/detectors/yolor/models/export.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ import torch
4
+
5
+ from asone.detectors.yolor.utils.google_utils import attempt_download
6
+
7
+ if __name__ == '__main__':
8
+ parser = argparse.ArgumentParser()
9
+ parser.add_argument('--weights', type=str, default='./yolov4.pt', help='weights path')
10
+ parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
11
+ parser.add_argument('--batch-size', type=int, default=1, help='batch size')
12
+ opt = parser.parse_args()
13
+ opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
14
+ print(opt)
15
+
16
+ # Input
17
+ img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
18
+
19
+ # Load PyTorch model
20
+ attempt_download(opt.weights)
21
+ model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
22
+ model.eval()
23
+ model.model[-1].export = True # set Detect() layer export=True
24
+ y = model(img) # dry run
25
+
26
+ # TorchScript export
27
+ try:
28
+ print('\nStarting TorchScript export with torch %s...' % torch.__version__)
29
+ f = opt.weights.replace('.pt', '.torchscript.pt') # filename
30
+ ts = torch.jit.trace(model, img)
31
+ ts.save(f)
32
+ print('TorchScript export success, saved as %s' % f)
33
+ except Exception as e:
34
+ print('TorchScript export failure: %s' % e)
35
+
36
+ # ONNX export
37
+ try:
38
+ import onnx
39
+
40
+ print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
41
+ f = opt.weights.replace('.pt', '.onnx') # filename
42
+ model.fuse() # only for ONNX
43
+ torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
44
+ output_names=['classes', 'boxes'] if y is None else ['output'])
45
+
46
+ # Checks
47
+ onnx_model = onnx.load(f) # load onnx model
48
+ onnx.checker.check_model(onnx_model) # check onnx model
49
+ print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
50
+ print('ONNX export success, saved as %s' % f)
51
+ except Exception as e:
52
+ print('ONNX export failure: %s' % e)
53
+
54
+ # CoreML export
55
+ try:
56
+ import coremltools as ct
57
+
58
+ print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
59
+ # convert model from torchscript and apply pixel scaling as per detect.py
60
+ model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
61
+ f = opt.weights.replace('.pt', '.mlmodel') # filename
62
+ model.save(f)
63
+ print('CoreML export success, saved as %s' % f)
64
+ except Exception as e:
65
+ print('CoreML export failure: %s' % e)
66
+
67
+ # Finish
68
+ print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
asone/detectors/yolor/models/models.py ADDED
@@ -0,0 +1,761 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from asone.detectors.yolor.utils.google_utils import *
2
+ from asone.detectors.yolor.utils.layers import *
3
+ from asone.detectors.yolor.utils.parse_config import *
4
+ from asone.detectors.yolor.utils import torch_utils
5
+
6
+ ONNX_EXPORT = False
7
+
8
+
9
+ def create_modules(module_defs, img_size, cfg):
10
+ # Constructs module list of layer blocks from module configuration in module_defs
11
+
12
+ img_size = [img_size] * 2 if isinstance(img_size, int) else img_size # expand if necessary
13
+ _ = module_defs.pop(0) # cfg training hyperparams (unused)
14
+ output_filters = [3] # input channels
15
+ module_list = nn.ModuleList()
16
+ routs = [] # list of layers which rout to deeper layers
17
+ yolo_index = -1
18
+
19
+ for i, mdef in enumerate(module_defs):
20
+ modules = nn.Sequential()
21
+
22
+ if mdef['type'] == 'convolutional':
23
+ bn = mdef['batch_normalize']
24
+ filters = mdef['filters']
25
+ k = mdef['size'] # kernel size
26
+ stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x'])
27
+ if isinstance(k, int): # single-size conv
28
+ modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1],
29
+ out_channels=filters,
30
+ kernel_size=k,
31
+ stride=stride,
32
+ padding=k // 2 if mdef['pad'] else 0,
33
+ groups=mdef['groups'] if 'groups' in mdef else 1,
34
+ bias=not bn))
35
+ else: # multiple-size conv
36
+ modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1],
37
+ out_ch=filters,
38
+ k=k,
39
+ stride=stride,
40
+ bias=not bn))
41
+
42
+ if bn:
43
+ modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4))
44
+ else:
45
+ routs.append(i) # detection output (goes into yolo layer)
46
+
47
+ if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441
48
+ modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))
49
+ elif mdef['activation'] == 'swish':
50
+ modules.add_module('activation', Swish())
51
+ elif mdef['activation'] == 'mish':
52
+ modules.add_module('activation', Mish())
53
+ elif mdef['activation'] == 'emb':
54
+ modules.add_module('activation', F.normalize())
55
+ elif mdef['activation'] == 'logistic':
56
+ modules.add_module('activation', nn.Sigmoid())
57
+ elif mdef['activation'] == 'silu':
58
+ modules.add_module('activation', nn.SiLU())
59
+
60
+ elif mdef['type'] == 'deformableconvolutional':
61
+ bn = mdef['batch_normalize']
62
+ filters = mdef['filters']
63
+ k = mdef['size'] # kernel size
64
+ stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x'])
65
+ if isinstance(k, int): # single-size conv
66
+ modules.add_module('DeformConv2d', DeformConv2d(output_filters[-1],
67
+ filters,
68
+ kernel_size=k,
69
+ padding=k // 2 if mdef['pad'] else 0,
70
+ stride=stride,
71
+ bias=not bn,
72
+ modulation=True))
73
+ else: # multiple-size conv
74
+ modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1],
75
+ out_ch=filters,
76
+ k=k,
77
+ stride=stride,
78
+ bias=not bn))
79
+
80
+ if bn:
81
+ modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4))
82
+ else:
83
+ routs.append(i) # detection output (goes into yolo layer)
84
+
85
+ if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441
86
+ modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))
87
+ elif mdef['activation'] == 'swish':
88
+ modules.add_module('activation', Swish())
89
+ elif mdef['activation'] == 'mish':
90
+ modules.add_module('activation', Mish())
91
+ elif mdef['activation'] == 'silu':
92
+ modules.add_module('activation', nn.SiLU())
93
+
94
+ elif mdef['type'] == 'dropout':
95
+ p = mdef['probability']
96
+ modules = nn.Dropout(p)
97
+
98
+ elif mdef['type'] == 'avgpool':
99
+ modules = GAP()
100
+
101
+ elif mdef['type'] == 'silence':
102
+ filters = output_filters[-1]
103
+ modules = Silence()
104
+
105
+ elif mdef['type'] == 'scale_channels': # nn.Sequential() placeholder for 'shortcut' layer
106
+ layers = mdef['from']
107
+ filters = output_filters[-1]
108
+ routs.extend([i + l if l < 0 else l for l in layers])
109
+ modules = ScaleChannel(layers=layers)
110
+
111
+ elif mdef['type'] == 'shift_channels': # nn.Sequential() placeholder for 'shortcut' layer
112
+ layers = mdef['from']
113
+ filters = output_filters[-1]
114
+ routs.extend([i + l if l < 0 else l for l in layers])
115
+ modules = ShiftChannel(layers=layers)
116
+
117
+ elif mdef['type'] == 'shift_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer
118
+ layers = mdef['from']
119
+ filters = output_filters[-1]
120
+ routs.extend([i + l if l < 0 else l for l in layers])
121
+ modules = ShiftChannel2D(layers=layers)
122
+
123
+ elif mdef['type'] == 'control_channels': # nn.Sequential() placeholder for 'shortcut' layer
124
+ layers = mdef['from']
125
+ filters = output_filters[-1]
126
+ routs.extend([i + l if l < 0 else l for l in layers])
127
+ modules = ControlChannel(layers=layers)
128
+
129
+ elif mdef['type'] == 'control_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer
130
+ layers = mdef['from']
131
+ filters = output_filters[-1]
132
+ routs.extend([i + l if l < 0 else l for l in layers])
133
+ modules = ControlChannel2D(layers=layers)
134
+
135
+ elif mdef['type'] == 'alternate_channels': # nn.Sequential() placeholder for 'shortcut' layer
136
+ layers = mdef['from']
137
+ filters = output_filters[-1] * 2
138
+ routs.extend([i + l if l < 0 else l for l in layers])
139
+ modules = AlternateChannel(layers=layers)
140
+
141
+ elif mdef['type'] == 'alternate_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer
142
+ layers = mdef['from']
143
+ filters = output_filters[-1] * 2
144
+ routs.extend([i + l if l < 0 else l for l in layers])
145
+ modules = AlternateChannel2D(layers=layers)
146
+
147
+ elif mdef['type'] == 'select_channels': # nn.Sequential() placeholder for 'shortcut' layer
148
+ layers = mdef['from']
149
+ filters = output_filters[-1]
150
+ routs.extend([i + l if l < 0 else l for l in layers])
151
+ modules = SelectChannel(layers=layers)
152
+
153
+ elif mdef['type'] == 'select_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer
154
+ layers = mdef['from']
155
+ filters = output_filters[-1]
156
+ routs.extend([i + l if l < 0 else l for l in layers])
157
+ modules = SelectChannel2D(layers=layers)
158
+
159
+ elif mdef['type'] == 'sam': # nn.Sequential() placeholder for 'shortcut' layer
160
+ layers = mdef['from']
161
+ filters = output_filters[-1]
162
+ routs.extend([i + l if l < 0 else l for l in layers])
163
+ modules = ScaleSpatial(layers=layers)
164
+
165
+ elif mdef['type'] == 'BatchNorm2d':
166
+ filters = output_filters[-1]
167
+ modules = nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4)
168
+ if i == 0 and filters == 3: # normalize RGB image
169
+ # imagenet mean and var https://pytorch.org/docs/stable/torchvision/models.html#classification
170
+ modules.running_mean = torch.tensor([0.485, 0.456, 0.406])
171
+ modules.running_var = torch.tensor([0.0524, 0.0502, 0.0506])
172
+
173
+ elif mdef['type'] == 'maxpool':
174
+ k = mdef['size'] # kernel size
175
+ stride = mdef['stride']
176
+ maxpool = nn.MaxPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2)
177
+ if k == 2 and stride == 1: # yolov3-tiny
178
+ modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1)))
179
+ modules.add_module('MaxPool2d', maxpool)
180
+ else:
181
+ modules = maxpool
182
+
183
+ elif mdef['type'] == 'local_avgpool':
184
+ k = mdef['size'] # kernel size
185
+ stride = mdef['stride']
186
+ avgpool = nn.AvgPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2)
187
+ if k == 2 and stride == 1: # yolov3-tiny
188
+ modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1)))
189
+ modules.add_module('AvgPool2d', avgpool)
190
+ else:
191
+ modules = avgpool
192
+
193
+ elif mdef['type'] == 'upsample':
194
+ if ONNX_EXPORT: # explicitly state size, avoid scale_factor
195
+ g = (yolo_index + 1) * 2 / 32 # gain
196
+ modules = nn.Upsample(size=tuple(int(x * g) for x in img_size)) # img_size = (320, 192)
197
+ else:
198
+ modules = nn.Upsample(scale_factor=mdef['stride'])
199
+
200
+ elif mdef['type'] == 'route': # nn.Sequential() placeholder for 'route' layer
201
+ layers = mdef['layers']
202
+ filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])
203
+ routs.extend([i + l if l < 0 else l for l in layers])
204
+ modules = FeatureConcat(layers=layers)
205
+
206
+ elif mdef['type'] == 'route2': # nn.Sequential() placeholder for 'route' layer
207
+ layers = mdef['layers']
208
+ filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])
209
+ routs.extend([i + l if l < 0 else l for l in layers])
210
+ modules = FeatureConcat2(layers=layers)
211
+
212
+ elif mdef['type'] == 'route3': # nn.Sequential() placeholder for 'route' layer
213
+ layers = mdef['layers']
214
+ filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])
215
+ routs.extend([i + l if l < 0 else l for l in layers])
216
+ modules = FeatureConcat3(layers=layers)
217
+
218
+ elif mdef['type'] == 'route_lhalf': # nn.Sequential() placeholder for 'route' layer
219
+ layers = mdef['layers']
220
+ filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])//2
221
+ routs.extend([i + l if l < 0 else l for l in layers])
222
+ modules = FeatureConcat_l(layers=layers)
223
+
224
+ elif mdef['type'] == 'shortcut': # nn.Sequential() placeholder for 'shortcut' layer
225
+ layers = mdef['from']
226
+ filters = output_filters[-1]
227
+ routs.extend([i + l if l < 0 else l for l in layers])
228
+ modules = WeightedFeatureFusion(layers=layers, weight='weights_type' in mdef)
229
+
230
+ elif mdef['type'] == 'reorg3d': # yolov3-spp-pan-scale
231
+ pass
232
+
233
+ elif mdef['type'] == 'reorg': # yolov3-spp-pan-scale
234
+ filters = 4 * output_filters[-1]
235
+ modules.add_module('Reorg', Reorg())
236
+
237
+ elif mdef['type'] == 'dwt': # yolov3-spp-pan-scale
238
+ filters = 4 * output_filters[-1]
239
+ modules.add_module('DWT', DWT())
240
+
241
+ elif mdef['type'] == 'implicit_add': # yolov3-spp-pan-scale
242
+ filters = mdef['filters']
243
+ modules = ImplicitA(channel=filters)
244
+
245
+ elif mdef['type'] == 'implicit_mul': # yolov3-spp-pan-scale
246
+ filters = mdef['filters']
247
+ modules = ImplicitM(channel=filters)
248
+
249
+ elif mdef['type'] == 'implicit_cat': # yolov3-spp-pan-scale
250
+ filters = mdef['filters']
251
+ modules = ImplicitC(channel=filters)
252
+
253
+ elif mdef['type'] == 'implicit_add_2d': # yolov3-spp-pan-scale
254
+ channels = mdef['filters']
255
+ filters = mdef['atoms']
256
+ modules = Implicit2DA(atom=filters, channel=channels)
257
+
258
+ elif mdef['type'] == 'implicit_mul_2d': # yolov3-spp-pan-scale
259
+ channels = mdef['filters']
260
+ filters = mdef['atoms']
261
+ modules = Implicit2DM(atom=filters, channel=channels)
262
+
263
+ elif mdef['type'] == 'implicit_cat_2d': # yolov3-spp-pan-scale
264
+ channels = mdef['filters']
265
+ filters = mdef['atoms']
266
+ modules = Implicit2DC(atom=filters, channel=channels)
267
+
268
+ elif mdef['type'] == 'yolo':
269
+ yolo_index += 1
270
+ stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides
271
+ if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides
272
+ stride = [32, 16, 8]
273
+ layers = mdef['from'] if 'from' in mdef else []
274
+ modules = YOLOLayer(anchors=mdef['anchors'][mdef['mask']], # anchor list
275
+ nc=mdef['classes'], # number of classes
276
+ img_size=img_size, # (416, 416)
277
+ yolo_index=yolo_index, # 0, 1, 2...
278
+ layers=layers, # output layers
279
+ stride=stride[yolo_index])
280
+
281
+ # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
282
+ try:
283
+ j = layers[yolo_index] if 'from' in mdef else -2
284
+ bias_ = module_list[j][0].bias # shape(255,)
285
+ bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85)
286
+ #bias[:, 4] += -4.5 # obj
287
+ bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image)
288
+ bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc)
289
+ module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad)
290
+
291
+ #j = [-2, -5, -8]
292
+ #for sj in j:
293
+ # bias_ = module_list[sj][0].bias
294
+ # bias = bias_[:modules.no * 1].view(1, -1)
295
+ # bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2)
296
+ # bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99))
297
+ # module_list[sj][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad)
298
+ except:
299
+ print('WARNING: smart bias initialization failure.')
300
+
301
+ elif mdef['type'] == 'jde':
302
+ yolo_index += 1
303
+ stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides
304
+ if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides
305
+ stride = [32, 16, 8]
306
+ layers = mdef['from'] if 'from' in mdef else []
307
+ modules = JDELayer(anchors=mdef['anchors'][mdef['mask']], # anchor list
308
+ nc=mdef['classes'], # number of classes
309
+ img_size=img_size, # (416, 416)
310
+ yolo_index=yolo_index, # 0, 1, 2...
311
+ layers=layers, # output layers
312
+ stride=stride[yolo_index])
313
+
314
+ # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
315
+ try:
316
+ j = layers[yolo_index] if 'from' in mdef else -1
317
+ bias_ = module_list[j][0].bias # shape(255,)
318
+ bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85)
319
+ #bias[:, 4] += -4.5 # obj
320
+ bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image)
321
+ bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc)
322
+ module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad)
323
+ except:
324
+ print('WARNING: smart bias initialization failure.')
325
+
326
+ else:
327
+ print('Warning: Unrecognized Layer Type: ' + mdef['type'])
328
+
329
+ # Register module list and number of output filters
330
+ module_list.append(modules)
331
+ output_filters.append(filters)
332
+
333
+ routs_binary = [False] * (i + 1)
334
+ for i in routs:
335
+ routs_binary[i] = True
336
+ return module_list, routs_binary
337
+
338
+
339
+ class YOLOLayer(nn.Module):
340
+ def __init__(self, anchors, nc, img_size, yolo_index, layers, stride):
341
+ super(YOLOLayer, self).__init__()
342
+ self.anchors = torch.Tensor(anchors)
343
+ self.index = yolo_index # index of this layer in layers
344
+ self.layers = layers # model output layer indices
345
+ self.stride = stride # layer stride
346
+ self.nl = len(layers) # number of output layers (3)
347
+ self.na = len(anchors) # number of anchors (3)
348
+ self.nc = nc # number of classes (80)
349
+ self.no = nc + 5 # number of outputs (85)
350
+ self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints
351
+ self.anchor_vec = self.anchors / self.stride
352
+ self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2)
353
+
354
+ if ONNX_EXPORT:
355
+ self.training = False
356
+ self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points
357
+
358
+ def create_grids(self, ng=(13, 13), device='cpu'):
359
+ self.nx, self.ny = ng # x and y grid size
360
+ self.ng = torch.tensor(ng, dtype=torch.float)
361
+
362
+ # build xy offsets
363
+ if not self.training:
364
+ yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)])
365
+ self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float()
366
+
367
+ if self.anchor_vec.device != device:
368
+ self.anchor_vec = self.anchor_vec.to(device)
369
+ self.anchor_wh = self.anchor_wh.to(device)
370
+
371
+ def forward(self, p, out):
372
+ ASFF = False # https://arxiv.org/abs/1911.09516
373
+ if ASFF:
374
+ i, n = self.index, self.nl # index in layers, number of layers
375
+ p = out[self.layers[i]]
376
+ bs, _, ny, nx = p.shape # bs, 255, 13, 13
377
+ if (self.nx, self.ny) != (nx, ny):
378
+ self.create_grids((nx, ny), p.device)
379
+
380
+ # outputs and weights
381
+ # w = F.softmax(p[:, -n:], 1) # normalized weights
382
+ w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster)
383
+ # w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension
384
+
385
+ # weighted ASFF sum
386
+ p = out[self.layers[i]][:, :-n] * w[:, i:i + 1]
387
+ for j in range(n):
388
+ if j != i:
389
+ p += w[:, j:j + 1] * \
390
+ F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False)
391
+
392
+ elif ONNX_EXPORT:
393
+ bs = 1 # batch size
394
+ else:
395
+ bs, _, ny, nx = p.shape # bs, 255, 13, 13
396
+ if (self.nx, self.ny) != (nx, ny):
397
+ self.create_grids((nx, ny), p.device)
398
+
399
+ # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh)
400
+ p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction
401
+
402
+ if self.training:
403
+ return p
404
+
405
+ elif ONNX_EXPORT:
406
+ # Avoid broadcasting for ANE operations
407
+ m = self.na * self.nx * self.ny
408
+ ng = 1. / self.ng.repeat(m, 1)
409
+ grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2)
410
+ anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng
411
+
412
+ p = p.view(m, self.no)
413
+ xy = torch.sigmoid(p[:, 0:2]) + grid # x, y
414
+ wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height
415
+ p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \
416
+ torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf
417
+ return p_cls, xy * ng, wh
418
+
419
+ else: # inference
420
+ io = p.sigmoid()
421
+ io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid)
422
+ io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh
423
+ io[..., :4] *= self.stride
424
+ #io = p.clone() # inference output
425
+ #io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid # xy
426
+ #io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method
427
+ #io[..., :4] *= self.stride
428
+ #torch.sigmoid_(io[..., 4:])
429
+ return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85]
430
+
431
+
432
+ class JDELayer(nn.Module):
433
+ def __init__(self, anchors, nc, img_size, yolo_index, layers, stride):
434
+ super(JDELayer, self).__init__()
435
+ self.anchors = torch.Tensor(anchors)
436
+ self.index = yolo_index # index of this layer in layers
437
+ self.layers = layers # model output layer indices
438
+ self.stride = stride # layer stride
439
+ self.nl = len(layers) # number of output layers (3)
440
+ self.na = len(anchors) # number of anchors (3)
441
+ self.nc = nc # number of classes (80)
442
+ self.no = nc + 5 # number of outputs (85)
443
+ self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints
444
+ self.anchor_vec = self.anchors / self.stride
445
+ self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2)
446
+
447
+ if ONNX_EXPORT:
448
+ self.training = False
449
+ self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points
450
+
451
+ def create_grids(self, ng=(13, 13), device='cpu'):
452
+ self.nx, self.ny = ng # x and y grid size
453
+ self.ng = torch.tensor(ng, dtype=torch.float)
454
+
455
+ # build xy offsets
456
+ if not self.training:
457
+ yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)])
458
+ self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float()
459
+
460
+ if self.anchor_vec.device != device:
461
+ self.anchor_vec = self.anchor_vec.to(device)
462
+ self.anchor_wh = self.anchor_wh.to(device)
463
+
464
+ def forward(self, p, out):
465
+ ASFF = False # https://arxiv.org/abs/1911.09516
466
+ if ASFF:
467
+ i, n = self.index, self.nl # index in layers, number of layers
468
+ p = out[self.layers[i]]
469
+ bs, _, ny, nx = p.shape # bs, 255, 13, 13
470
+ if (self.nx, self.ny) != (nx, ny):
471
+ self.create_grids((nx, ny), p.device)
472
+
473
+ # outputs and weights
474
+ # w = F.softmax(p[:, -n:], 1) # normalized weights
475
+ w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster)
476
+ # w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension
477
+
478
+ # weighted ASFF sum
479
+ p = out[self.layers[i]][:, :-n] * w[:, i:i + 1]
480
+ for j in range(n):
481
+ if j != i:
482
+ p += w[:, j:j + 1] * \
483
+ F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False)
484
+
485
+ elif ONNX_EXPORT:
486
+ bs = 1 # batch size
487
+ else:
488
+ bs, _, ny, nx = p.shape # bs, 255, 13, 13
489
+ if (self.nx, self.ny) != (nx, ny):
490
+ self.create_grids((nx, ny), p.device)
491
+
492
+ # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh)
493
+ p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction
494
+
495
+ if self.training:
496
+ return p
497
+
498
+ elif ONNX_EXPORT:
499
+ # Avoid broadcasting for ANE operations
500
+ m = self.na * self.nx * self.ny
501
+ ng = 1. / self.ng.repeat(m, 1)
502
+ grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2)
503
+ anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng
504
+
505
+ p = p.view(m, self.no)
506
+ xy = torch.sigmoid(p[:, 0:2]) + grid # x, y
507
+ wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height
508
+ p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \
509
+ torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf
510
+ return p_cls, xy * ng, wh
511
+
512
+ else: # inference
513
+ #io = p.sigmoid()
514
+ #io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid)
515
+ #io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh
516
+ #io[..., :4] *= self.stride
517
+ io = p.clone() # inference output
518
+ io[..., :2] = torch.sigmoid(io[..., :2]) * 2. - 0.5 + self.grid # xy
519
+ io[..., 2:4] = (torch.sigmoid(io[..., 2:4]) * 2) ** 2 * self.anchor_wh # wh yolo method
520
+ io[..., :4] *= self.stride
521
+ io[..., 4:] = F.softmax(io[..., 4:])
522
+ return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85]
523
+
524
+ class Darknet(nn.Module):
525
+ # YOLOv3 object detection model
526
+
527
+ def __init__(self, cfg, img_size=(416, 416), verbose=False):
528
+ super(Darknet, self).__init__()
529
+
530
+ self.module_defs = parse_model_cfg(cfg)
531
+ self.module_list, self.routs = create_modules(self.module_defs, img_size, cfg)
532
+ self.yolo_layers = get_yolo_layers(self)
533
+ # torch_utils.initialize_weights(self)
534
+
535
+ # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
536
+ self.version = np.array([0, 2, 5], dtype=np.int32) # (int32) version info: major, minor, revision
537
+ self.seen = np.array([0], dtype=np.int64) # (int64) number of images seen during training
538
+ self.info(verbose) if not ONNX_EXPORT else None # print model description
539
+
540
+ def forward(self, x, augment=False, verbose=False):
541
+
542
+ if not augment:
543
+ return self.forward_once(x)
544
+ else: # Augment images (inference and test only) https://github.com/ultralytics/yolov3/issues/931
545
+ img_size = x.shape[-2:] # height, width
546
+ s = [0.83, 0.67] # scales
547
+ y = []
548
+ for i, xi in enumerate((x,
549
+ torch_utils.scale_img(x.flip(3), s[0], same_shape=False), # flip-lr and scale
550
+ torch_utils.scale_img(x, s[1], same_shape=False), # scale
551
+ )):
552
+ # cv2.imwrite('img%g.jpg' % i, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1])
553
+ y.append(self.forward_once(xi)[0])
554
+
555
+ y[1][..., :4] /= s[0] # scale
556
+ y[1][..., 0] = img_size[1] - y[1][..., 0] # flip lr
557
+ y[2][..., :4] /= s[1] # scale
558
+
559
+ # for i, yi in enumerate(y): # coco small, medium, large = < 32**2 < 96**2 <
560
+ # area = yi[..., 2:4].prod(2)[:, :, None]
561
+ # if i == 1:
562
+ # yi *= (area < 96. ** 2).float()
563
+ # elif i == 2:
564
+ # yi *= (area > 32. ** 2).float()
565
+ # y[i] = yi
566
+
567
+ y = torch.cat(y, 1)
568
+ return y, None
569
+
570
+ def forward_once(self, x, augment=False, verbose=False):
571
+ img_size = x.shape[-2:] # height, width
572
+ yolo_out, out = [], []
573
+ if verbose:
574
+ print('0', x.shape)
575
+ str = ''
576
+
577
+ # Augment images (inference and test only)
578
+ if augment: # https://github.com/ultralytics/yolov3/issues/931
579
+ nb = x.shape[0] # batch size
580
+ s = [0.83, 0.67] # scales
581
+ x = torch.cat((x,
582
+ torch_utils.scale_img(x.flip(3), s[0]), # flip-lr and scale
583
+ torch_utils.scale_img(x, s[1]), # scale
584
+ ), 0)
585
+
586
+ for i, module in enumerate(self.module_list):
587
+ name = module.__class__.__name__
588
+ #print(name)
589
+ if name in ['WeightedFeatureFusion', 'FeatureConcat', 'FeatureConcat2', 'FeatureConcat3', 'FeatureConcat_l', 'ScaleChannel', 'ShiftChannel', 'ShiftChannel2D', 'ControlChannel', 'ControlChannel2D', 'AlternateChannel', 'AlternateChannel2D', 'SelectChannel', 'SelectChannel2D', 'ScaleSpatial']: # sum, concat
590
+ if verbose:
591
+ l = [i - 1] + module.layers # layers
592
+ sh = [list(x.shape)] + [list(out[i].shape) for i in module.layers] # shapes
593
+ str = ' >> ' + ' + '.join(['layer %g %s' % x for x in zip(l, sh)])
594
+ x = module(x, out) # WeightedFeatureFusion(), FeatureConcat()
595
+ elif name in ['ImplicitA', 'ImplicitM', 'ImplicitC', 'Implicit2DA', 'Implicit2DM', 'Implicit2DC']:
596
+ x = module()
597
+ elif name == 'YOLOLayer':
598
+ yolo_out.append(module(x, out))
599
+ elif name == 'JDELayer':
600
+ yolo_out.append(module(x, out))
601
+ else: # run module directly, i.e. mtype = 'convolutional', 'upsample', 'maxpool', 'batchnorm2d' etc.
602
+ #print(module)
603
+ #print(x.shape)
604
+ x = module(x)
605
+
606
+ out.append(x if self.routs[i] else [])
607
+ if verbose:
608
+ print('%g/%g %s -' % (i, len(self.module_list), name), list(x.shape), str)
609
+ str = ''
610
+
611
+ if self.training: # train
612
+ return yolo_out
613
+ elif ONNX_EXPORT: # export
614
+ x = [torch.cat(x, 0) for x in zip(*yolo_out)]
615
+ return x[0], torch.cat(x[1:3], 1) # scores, boxes: 3780x80, 3780x4
616
+ else: # inference or test
617
+ x, p = zip(*yolo_out) # inference output, training output
618
+ x = torch.cat(x, 1) # cat yolo outputs
619
+ if augment: # de-augment results
620
+ x = torch.split(x, nb, dim=0)
621
+ x[1][..., :4] /= s[0] # scale
622
+ x[1][..., 0] = img_size[1] - x[1][..., 0] # flip lr
623
+ x[2][..., :4] /= s[1] # scale
624
+ x = torch.cat(x, 1)
625
+ return x, p
626
+
627
+ def fuse(self):
628
+ # Fuse Conv2d + BatchNorm2d layers throughout model
629
+ print('Fusing layers...')
630
+ fused_list = nn.ModuleList()
631
+ for a in list(self.children())[0]:
632
+ if isinstance(a, nn.Sequential):
633
+ for i, b in enumerate(a):
634
+ if isinstance(b, nn.modules.batchnorm.BatchNorm2d):
635
+ # fuse this bn layer with the previous conv2d layer
636
+ conv = a[i - 1]
637
+ fused = torch_utils.fuse_conv_and_bn(conv, b)
638
+ a = nn.Sequential(fused, *list(a.children())[i + 1:])
639
+ break
640
+ fused_list.append(a)
641
+ self.module_list = fused_list
642
+ self.info() if not ONNX_EXPORT else None # yolov3-spp reduced from 225 to 152 layers
643
+
644
+ def info(self, verbose=False):
645
+ torch_utils.model_info(self, verbose)
646
+
647
+
648
+ def get_yolo_layers(model):
649
+ return [i for i, m in enumerate(model.module_list) if m.__class__.__name__ in ['YOLOLayer', 'JDELayer']] # [89, 101, 113]
650
+
651
+
652
+ def load_darknet_weights(self, weights, cutoff=-1):
653
+ # Parses and loads the weights stored in 'weights'
654
+
655
+ # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded)
656
+ file = Path(weights).name
657
+ if file == 'darknet53.conv.74':
658
+ cutoff = 75
659
+ elif file == 'yolov3-tiny.conv.15':
660
+ cutoff = 15
661
+
662
+ # Read weights file
663
+ with open(weights, 'rb') as f:
664
+ # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
665
+ self.version = np.fromfile(f, dtype=np.int32, count=3) # (int32) version info: major, minor, revision
666
+ self.seen = np.fromfile(f, dtype=np.int64, count=1) # (int64) number of images seen during training
667
+
668
+ weights = np.fromfile(f, dtype=np.float32) # the rest are weights
669
+
670
+ ptr = 0
671
+ for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
672
+ if mdef['type'] == 'convolutional':
673
+ conv = module[0]
674
+ if mdef['batch_normalize']:
675
+ # Load BN bias, weights, running mean and running variance
676
+ bn = module[1]
677
+ nb = bn.bias.numel() # number of biases
678
+ # Bias
679
+ bn.bias.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.bias))
680
+ ptr += nb
681
+ # Weight
682
+ bn.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.weight))
683
+ ptr += nb
684
+ # Running Mean
685
+ bn.running_mean.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_mean))
686
+ ptr += nb
687
+ # Running Var
688
+ bn.running_var.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_var))
689
+ ptr += nb
690
+ else:
691
+ # Load conv. bias
692
+ nb = conv.bias.numel()
693
+ conv_b = torch.from_numpy(weights[ptr:ptr + nb]).view_as(conv.bias)
694
+ conv.bias.data.copy_(conv_b)
695
+ ptr += nb
696
+ # Load conv. weights
697
+ nw = conv.weight.numel() # number of weights
698
+ conv.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nw]).view_as(conv.weight))
699
+ ptr += nw
700
+
701
+
702
+ def save_weights(self, path='model.weights', cutoff=-1):
703
+ # Converts a PyTorch model to Darket format (*.pt to *.weights)
704
+ # Note: Does not work if model.fuse() is applied
705
+ with open(path, 'wb') as f:
706
+ # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
707
+ self.version.tofile(f) # (int32) version info: major, minor, revision
708
+ self.seen.tofile(f) # (int64) number of images seen during training
709
+
710
+ # Iterate through layers
711
+ for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
712
+ if mdef['type'] == 'convolutional':
713
+ conv_layer = module[0]
714
+ # If batch norm, load bn first
715
+ if mdef['batch_normalize']:
716
+ bn_layer = module[1]
717
+ bn_layer.bias.data.cpu().numpy().tofile(f)
718
+ bn_layer.weight.data.cpu().numpy().tofile(f)
719
+ bn_layer.running_mean.data.cpu().numpy().tofile(f)
720
+ bn_layer.running_var.data.cpu().numpy().tofile(f)
721
+ # Load conv bias
722
+ else:
723
+ conv_layer.bias.data.cpu().numpy().tofile(f)
724
+ # Load conv weights
725
+ conv_layer.weight.data.cpu().numpy().tofile(f)
726
+
727
+
728
+ def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights', saveto='converted.weights'):
729
+ # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa)
730
+ # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights')
731
+
732
+ # Initialize model
733
+ model = Darknet(cfg)
734
+ ckpt = torch.load(weights) # load checkpoint
735
+ try:
736
+ ckpt['model'] = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
737
+ model.load_state_dict(ckpt['model'], strict=False)
738
+ save_weights(model, path=saveto, cutoff=-1)
739
+ except KeyError as e:
740
+ print(e)
741
+
742
+ def attempt_download(weights):
743
+ # Attempt to download pretrained weights if not found locally
744
+ weights = weights.strip()
745
+ msg = weights + ' missing, try downloading from https://drive.google.com/open?id=1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0'
746
+
747
+ if len(weights) > 0 and not os.path.isfile(weights):
748
+ d = {''}
749
+
750
+ file = Path(weights).name
751
+ if file in d:
752
+ r = gdrive_download(id=d[file], name=weights)
753
+ else: # download from pjreddie.com
754
+ url = 'https://pjreddie.com/media/files/' + file
755
+ print('Downloading ' + url)
756
+ r = os.system('curl -f ' + url + ' -o ' + weights)
757
+
758
+ # Error check
759
+ if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB
760
+ os.system('rm ' + weights) # remove partial downloads
761
+ raise Exception(msg)
asone/detectors/yolor/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
asone/detectors/yolor/utils/activations.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Activation functions
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+
7
+
8
+ # Swish https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------
9
+ class Swish(nn.Module): #
10
+ @staticmethod
11
+ def forward(x):
12
+ return x * torch.sigmoid(x)
13
+
14
+
15
+ class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
16
+ @staticmethod
17
+ def forward(x):
18
+ # return x * F.hardsigmoid(x) # for torchscript and CoreML
19
+ return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
20
+
21
+
22
+ class MemoryEfficientSwish(nn.Module):
23
+ class F(torch.autograd.Function):
24
+ @staticmethod
25
+ def forward(ctx, x):
26
+ ctx.save_for_backward(x)
27
+ return x * torch.sigmoid(x)
28
+
29
+ @staticmethod
30
+ def backward(ctx, grad_output):
31
+ x = ctx.saved_tensors[0]
32
+ sx = torch.sigmoid(x)
33
+ return grad_output * (sx * (1 + x * (1 - sx)))
34
+
35
+ def forward(self, x):
36
+ return self.F.apply(x)
37
+
38
+
39
+ # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40
+ class Mish(nn.Module):
41
+ @staticmethod
42
+ def forward(x):
43
+ return x * F.softplus(x).tanh()
44
+
45
+
46
+ class MemoryEfficientMish(nn.Module):
47
+ class F(torch.autograd.Function):
48
+ @staticmethod
49
+ def forward(ctx, x):
50
+ ctx.save_for_backward(x)
51
+ return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
52
+
53
+ @staticmethod
54
+ def backward(ctx, grad_output):
55
+ x = ctx.saved_tensors[0]
56
+ sx = torch.sigmoid(x)
57
+ fx = F.softplus(x).tanh()
58
+ return grad_output * (fx + x * sx * (1 - fx * fx))
59
+
60
+ def forward(self, x):
61
+ return self.F.apply(x)
62
+
63
+
64
+ # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65
+ class FReLU(nn.Module):
66
+ def __init__(self, c1, k=3): # ch_in, kernel
67
+ super().__init__()
68
+ self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1)
69
+ self.bn = nn.BatchNorm2d(c1)
70
+
71
+ def forward(self, x):
72
+ return torch.max(x, self.bn(self.conv(x)))
asone/detectors/yolor/utils/autoanchor.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Auto-anchor utils
2
+
3
+ import numpy as np
4
+ import torch
5
+ import yaml
6
+ from scipy.cluster.vq import kmeans
7
+ from tqdm import tqdm
8
+
9
+
10
+ def check_anchor_order(m):
11
+ # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
12
+ a = m.anchor_grid.prod(-1).view(-1) # anchor area
13
+ da = a[-1] - a[0] # delta a
14
+ ds = m.stride[-1] - m.stride[0] # delta s
15
+ if da.sign() != ds.sign(): # same order
16
+ print('Reversing anchor order')
17
+ m.anchors[:] = m.anchors.flip(0)
18
+ m.anchor_grid[:] = m.anchor_grid.flip(0)
19
+
20
+
21
+ def check_anchors(dataset, model, thr=4.0, imgsz=640):
22
+ # Check anchor fit to data, recompute if necessary
23
+ print('\nAnalyzing anchors... ', end='')
24
+ m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
25
+ shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
26
+ scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
27
+ wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
28
+
29
+ def metric(k): # compute metric
30
+ r = wh[:, None] / k[None]
31
+ x = torch.min(r, 1. / r).min(2)[0] # ratio metric
32
+ best = x.max(1)[0] # best_x
33
+ aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold
34
+ bpr = (best > 1. / thr).float().mean() # best possible recall
35
+ return bpr, aat
36
+
37
+ bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
38
+ print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='')
39
+ if bpr < 0.98: # threshold to recompute
40
+ print('. Attempting to improve anchors, please wait...')
41
+ na = m.anchor_grid.numel() // 2 # number of anchors
42
+ new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
43
+ new_bpr = metric(new_anchors.reshape(-1, 2))[0]
44
+ if new_bpr > bpr: # replace anchors
45
+ new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors)
46
+ m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference
47
+ m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
48
+ check_anchor_order(m)
49
+ print('New anchors saved to model. Update model *.yaml to use these anchors in the future.')
50
+ else:
51
+ print('Original anchors better than new anchors. Proceeding with original anchors.')
52
+ print('') # newline
53
+
54
+
55
+ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
56
+ """ Creates kmeans-evolved anchors from training dataset
57
+
58
+ Arguments:
59
+ path: path to dataset *.yaml, or a loaded dataset
60
+ n: number of anchors
61
+ img_size: image size used for training
62
+ thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
63
+ gen: generations to evolve anchors using genetic algorithm
64
+ verbose: print all results
65
+
66
+ Return:
67
+ k: kmeans evolved anchors
68
+
69
+ Usage:
70
+ from utils.general import *; _ = kmean_anchors()
71
+ """
72
+ thr = 1. / thr
73
+
74
+ def metric(k, wh): # compute metrics
75
+ r = wh[:, None] / k[None]
76
+ x = torch.min(r, 1. / r).min(2)[0] # ratio metric
77
+ # x = wh_iou(wh, torch.tensor(k)) # iou metric
78
+ return x, x.max(1)[0] # x, best_x
79
+
80
+ def anchor_fitness(k): # mutation fitness
81
+ _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
82
+ return (best * (best > thr).float()).mean() # fitness
83
+
84
+ def print_results(k):
85
+ k = k[np.argsort(k.prod(1))] # sort small to large
86
+ x, best = metric(k, wh0)
87
+ bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
88
+ print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat))
89
+ print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' %
90
+ (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='')
91
+ for i, x in enumerate(k):
92
+ print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
93
+ return k
94
+
95
+ if isinstance(path, str): # *.yaml file
96
+ with open(path) as f:
97
+ data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
98
+ from utils.datasets import LoadImagesAndLabels
99
+ dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
100
+ else:
101
+ dataset = path # dataset
102
+
103
+ # Get label wh
104
+ shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
105
+ wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
106
+
107
+ # Filter
108
+ i = (wh0 < 3.0).any(1).sum()
109
+ if i:
110
+ print('WARNING: Extremely small objects found. '
111
+ '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
112
+ wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
113
+
114
+ # Kmeans calculation
115
+ print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
116
+ s = wh.std(0) # sigmas for whitening
117
+ k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
118
+ k *= s
119
+ wh = torch.tensor(wh, dtype=torch.float32) # filtered
120
+ wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
121
+ k = print_results(k)
122
+
123
+ # Plot
124
+ # k, d = [None] * 20, [None] * 20
125
+ # for i in tqdm(range(1, 21)):
126
+ # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
127
+ # fig, ax = plt.subplots(1, 2, figsize=(14, 7))
128
+ # ax = ax.ravel()
129
+ # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
130
+ # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
131
+ # ax[0].hist(wh[wh[:, 0]<100, 0],400)
132
+ # ax[1].hist(wh[wh[:, 1]<100, 1],400)
133
+ # fig.tight_layout()
134
+ # fig.savefig('wh.png', dpi=200)
135
+
136
+ # Evolve
137
+ npr = np.random
138
+ f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
139
+ pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar
140
+ for _ in pbar:
141
+ v = np.ones(sh)
142
+ while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
143
+ v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
144
+ kg = (k.copy() * v).clip(min=2.0)
145
+ fg = anchor_fitness(kg)
146
+ if fg > f:
147
+ f, k = fg, kg.copy()
148
+ pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f
149
+ if verbose:
150
+ print_results(k)
151
+
152
+ return print_results(k)
asone/detectors/yolor/utils/datasets.py ADDED
@@ -0,0 +1,1297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dataset utils and dataloaders
2
+
3
+ import glob
4
+ import math
5
+ import os
6
+ import random
7
+ import shutil
8
+ import time
9
+ from itertools import repeat
10
+ from multiprocessing.pool import ThreadPool
11
+ from pathlib import Path
12
+ from threading import Thread
13
+
14
+ import cv2
15
+ import numpy as np
16
+ import torch
17
+ from PIL import Image, ExifTags
18
+ from torch.utils.data import Dataset
19
+ from tqdm import tqdm
20
+
21
+ import pickle
22
+ from copy import deepcopy
23
+ from pycocotools import mask as maskUtils
24
+ from torchvision.utils import save_image
25
+
26
+ from asone.detectors.yolor.utils.general import xyxy2xywh, xywh2xyxy
27
+ from asone.detectors.yolor.utils.torch_utils import torch_distributed_zero_first
28
+
29
+ # Parameters
30
+ help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
31
+ img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes
32
+ vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
33
+
34
+ # Get orientation exif tag
35
+ for orientation in ExifTags.TAGS.keys():
36
+ if ExifTags.TAGS[orientation] == 'Orientation':
37
+ break
38
+
39
+
40
+ def get_hash(files):
41
+ # Returns a single hash value of a list of files
42
+ return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
43
+
44
+
45
+ def exif_size(img):
46
+ # Returns exif-corrected PIL size
47
+ s = img.size # (width, height)
48
+ try:
49
+ rotation = dict(img._getexif().items())[orientation]
50
+ if rotation == 6: # rotation 270
51
+ s = (s[1], s[0])
52
+ elif rotation == 8: # rotation 90
53
+ s = (s[1], s[0])
54
+ except:
55
+ pass
56
+
57
+ return s
58
+
59
+
60
+ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
61
+ rank=-1, world_size=1, workers=8):
62
+ # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
63
+ with torch_distributed_zero_first(rank):
64
+ dataset = LoadImagesAndLabels(path, imgsz, batch_size,
65
+ augment=augment, # augment images
66
+ hyp=hyp, # augmentation hyperparameters
67
+ rect=rect, # rectangular training
68
+ cache_images=cache,
69
+ single_cls=opt.single_cls,
70
+ stride=int(stride),
71
+ pad=pad,
72
+ rank=rank)
73
+
74
+ batch_size = min(batch_size, len(dataset))
75
+ nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers
76
+ sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
77
+ dataloader = InfiniteDataLoader(dataset,
78
+ batch_size=batch_size,
79
+ num_workers=nw,
80
+ sampler=sampler,
81
+ pin_memory=True,
82
+ collate_fn=LoadImagesAndLabels.collate_fn) # torch.utils.data.DataLoader()
83
+ return dataloader, dataset
84
+
85
+
86
+ def create_dataloader9(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
87
+ rank=-1, world_size=1, workers=8):
88
+ # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
89
+ with torch_distributed_zero_first(rank):
90
+ dataset = LoadImagesAndLabels9(path, imgsz, batch_size,
91
+ augment=augment, # augment images
92
+ hyp=hyp, # augmentation hyperparameters
93
+ rect=rect, # rectangular training
94
+ cache_images=cache,
95
+ single_cls=opt.single_cls,
96
+ stride=int(stride),
97
+ pad=pad,
98
+ rank=rank)
99
+
100
+ batch_size = min(batch_size, len(dataset))
101
+ nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers
102
+ sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
103
+ dataloader = InfiniteDataLoader(dataset,
104
+ batch_size=batch_size,
105
+ num_workers=nw,
106
+ sampler=sampler,
107
+ pin_memory=True,
108
+ collate_fn=LoadImagesAndLabels9.collate_fn) # torch.utils.data.DataLoader()
109
+ return dataloader, dataset
110
+
111
+
112
+ class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
113
+ """ Dataloader that reuses workers
114
+
115
+ Uses same syntax as vanilla DataLoader
116
+ """
117
+
118
+ def __init__(self, *args, **kwargs):
119
+ super().__init__(*args, **kwargs)
120
+ object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
121
+ self.iterator = super().__iter__()
122
+
123
+ def __len__(self):
124
+ return len(self.batch_sampler.sampler)
125
+
126
+ def __iter__(self):
127
+ for i in range(len(self)):
128
+ yield next(self.iterator)
129
+
130
+
131
+ class _RepeatSampler(object):
132
+ """ Sampler that repeats forever
133
+
134
+ Args:
135
+ sampler (Sampler)
136
+ """
137
+
138
+ def __init__(self, sampler):
139
+ self.sampler = sampler
140
+
141
+ def __iter__(self):
142
+ while True:
143
+ yield from iter(self.sampler)
144
+
145
+
146
+ class LoadImages: # for inference
147
+ def __init__(self, path, img_size=640, auto_size=32):
148
+ p = str(Path(path)) # os-agnostic
149
+ p = os.path.abspath(p) # absolute path
150
+ if '*' in p:
151
+ files = sorted(glob.glob(p, recursive=True)) # glob
152
+ elif os.path.isdir(p):
153
+ files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
154
+ elif os.path.isfile(p):
155
+ files = [p] # files
156
+ else:
157
+ raise Exception('ERROR: %s does not exist' % p)
158
+
159
+ images = [x for x in files if x.split('.')[-1].lower() in img_formats]
160
+ videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
161
+ ni, nv = len(images), len(videos)
162
+
163
+ self.img_size = img_size
164
+ self.auto_size = auto_size
165
+ self.files = images + videos
166
+ self.nf = ni + nv # number of files
167
+ self.video_flag = [False] * ni + [True] * nv
168
+ self.mode = 'images'
169
+ if any(videos):
170
+ self.new_video(videos[0]) # new video
171
+ else:
172
+ self.cap = None
173
+ assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
174
+ (p, img_formats, vid_formats)
175
+
176
+ def __iter__(self):
177
+ self.count = 0
178
+ return self
179
+
180
+ def __next__(self):
181
+ if self.count == self.nf:
182
+ raise StopIteration
183
+ path = self.files[self.count]
184
+
185
+ if self.video_flag[self.count]:
186
+ # Read video
187
+ self.mode = 'video'
188
+ ret_val, img0 = self.cap.read()
189
+ if not ret_val:
190
+ self.count += 1
191
+ self.cap.release()
192
+ if self.count == self.nf: # last video
193
+ raise StopIteration
194
+ else:
195
+ path = self.files[self.count]
196
+ self.new_video(path)
197
+ ret_val, img0 = self.cap.read()
198
+
199
+ self.frame += 1
200
+ print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
201
+
202
+ else:
203
+ # Read image
204
+ self.count += 1
205
+ img0 = cv2.imread(path) # BGR
206
+ assert img0 is not None, 'Image Not Found ' + path
207
+ print('image %g/%g %s: ' % (self.count, self.nf, path), end='')
208
+
209
+ # Padded resize
210
+ img = letterbox(img0, new_shape=self.img_size, auto_size=self.auto_size)[0]
211
+
212
+ # Convert
213
+ img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
214
+ img = np.ascontiguousarray(img)
215
+
216
+ return path, img, img0, self.cap
217
+
218
+ def new_video(self, path):
219
+ self.frame = 0
220
+ self.cap = cv2.VideoCapture(path)
221
+ self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
222
+
223
+ def __len__(self):
224
+ return self.nf # number of files
225
+
226
+
227
+ class LoadWebcam: # for inference
228
+ def __init__(self, pipe='0', img_size=640):
229
+ self.img_size = img_size
230
+
231
+ if pipe.isnumeric():
232
+ pipe = eval(pipe) # local camera
233
+ # pipe = 'rtsp://192.168.1.64/1' # IP camera
234
+ # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
235
+ # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
236
+
237
+ self.pipe = pipe
238
+ self.cap = cv2.VideoCapture(pipe) # video capture object
239
+ self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
240
+
241
+ def __iter__(self):
242
+ self.count = -1
243
+ return self
244
+
245
+ def __next__(self):
246
+ self.count += 1
247
+ if cv2.waitKey(1) == ord('q'): # q to quit
248
+ self.cap.release()
249
+ cv2.destroyAllWindows()
250
+ raise StopIteration
251
+
252
+ # Read frame
253
+ if self.pipe == 0: # local camera
254
+ ret_val, img0 = self.cap.read()
255
+ img0 = cv2.flip(img0, 1) # flip left-right
256
+ else: # IP camera
257
+ n = 0
258
+ while True:
259
+ n += 1
260
+ self.cap.grab()
261
+ if n % 30 == 0: # skip frames
262
+ ret_val, img0 = self.cap.retrieve()
263
+ if ret_val:
264
+ break
265
+
266
+ # Print
267
+ assert ret_val, 'Camera Error %s' % self.pipe
268
+ img_path = 'webcam.jpg'
269
+ print('webcam %g: ' % self.count, end='')
270
+
271
+ # Padded resize
272
+ img = letterbox(img0, new_shape=self.img_size)[0]
273
+
274
+ # Convert
275
+ img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
276
+ img = np.ascontiguousarray(img)
277
+
278
+ return img_path, img, img0, None
279
+
280
+ def __len__(self):
281
+ return 0
282
+
283
+
284
+ class LoadStreams: # multiple IP or RTSP cameras
285
+ def __init__(self, sources='streams.txt', img_size=640):
286
+ self.mode = 'images'
287
+ self.img_size = img_size
288
+
289
+ if os.path.isfile(sources):
290
+ with open(sources, 'r') as f:
291
+ sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
292
+ else:
293
+ sources = [sources]
294
+
295
+ n = len(sources)
296
+ self.imgs = [None] * n
297
+ self.sources = sources
298
+ for i, s in enumerate(sources):
299
+ # Start the thread to read frames from the video stream
300
+ print('%g/%g: %s... ' % (i + 1, n, s), end='')
301
+ cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
302
+ assert cap.isOpened(), 'Failed to open %s' % s
303
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
304
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
305
+ fps = cap.get(cv2.CAP_PROP_FPS) % 100
306
+ _, self.imgs[i] = cap.read() # guarantee first frame
307
+ thread = Thread(target=self.update, args=([i, cap]), daemon=True)
308
+ print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
309
+ thread.start()
310
+ print('') # newline
311
+
312
+ # check for common shapes
313
+ s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
314
+ self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
315
+ if not self.rect:
316
+ print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
317
+
318
+ def update(self, index, cap):
319
+ # Read next stream frame in a daemon thread
320
+ n = 0
321
+ while cap.isOpened():
322
+ n += 1
323
+ # _, self.imgs[index] = cap.read()
324
+ cap.grab()
325
+ if n == 4: # read every 4th frame
326
+ _, self.imgs[index] = cap.retrieve()
327
+ n = 0
328
+ time.sleep(0.01) # wait time
329
+
330
+ def __iter__(self):
331
+ self.count = -1
332
+ return self
333
+
334
+ def __next__(self):
335
+ self.count += 1
336
+ img0 = self.imgs.copy()
337
+ if cv2.waitKey(1) == ord('q'): # q to quit
338
+ cv2.destroyAllWindows()
339
+ raise StopIteration
340
+
341
+ # Letterbox
342
+ img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
343
+
344
+ # Stack
345
+ img = np.stack(img, 0)
346
+
347
+ # Convert
348
+ img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
349
+ img = np.ascontiguousarray(img)
350
+
351
+ return self.sources, img, img0, None
352
+
353
+ def __len__(self):
354
+ return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
355
+
356
+
357
+ class LoadImagesAndLabels(Dataset): # for training/testing
358
+ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
359
+ cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1):
360
+ self.img_size = img_size
361
+ self.augment = augment
362
+ self.hyp = hyp
363
+ self.image_weights = image_weights
364
+ self.rect = False if image_weights else rect
365
+ self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
366
+ self.mosaic_border = [-img_size // 2, -img_size // 2]
367
+ self.stride = stride
368
+
369
+ def img2label_paths(img_paths):
370
+ # Define label paths as a function of image paths
371
+ sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
372
+ return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths]
373
+
374
+ try:
375
+ f = [] # image files
376
+ for p in path if isinstance(path, list) else [path]:
377
+ p = Path(p) # os-agnostic
378
+ if p.is_dir(): # dir
379
+ f += glob.glob(str(p / '**' / '*.*'), recursive=True)
380
+ elif p.is_file(): # file
381
+ with open(p, 'r') as t:
382
+ t = t.read().splitlines()
383
+ parent = str(p.parent) + os.sep
384
+ f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
385
+ else:
386
+ raise Exception('%s does not exist' % p)
387
+ self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
388
+ assert self.img_files, 'No images found'
389
+ except Exception as e:
390
+ raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
391
+
392
+ # Check cache
393
+ self.label_files = img2label_paths(self.img_files) # labels
394
+ cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels
395
+ if os.path.isfile(cache_path):
396
+ cache = torch.load(cache_path) # load
397
+ if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed
398
+ cache = self.cache_labels(cache_path) # re-cache
399
+ else:
400
+ cache = self.cache_labels(cache_path) # cache
401
+
402
+ # Read cache
403
+ cache.pop('hash') # remove hash
404
+ labels, shapes = zip(*cache.values())
405
+ self.labels = list(labels)
406
+ self.shapes = np.array(shapes, dtype=np.float64)
407
+ self.img_files = list(cache.keys()) # update
408
+ self.label_files = img2label_paths(cache.keys()) # update
409
+
410
+ n = len(shapes) # number of images
411
+ bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
412
+ nb = bi[-1] + 1 # number of batches
413
+ self.batch = bi # batch index of image
414
+ self.n = n
415
+
416
+ # Rectangular Training
417
+ if self.rect:
418
+ # Sort by aspect ratio
419
+ s = self.shapes # wh
420
+ ar = s[:, 1] / s[:, 0] # aspect ratio
421
+ irect = ar.argsort()
422
+ self.img_files = [self.img_files[i] for i in irect]
423
+ self.label_files = [self.label_files[i] for i in irect]
424
+ self.labels = [self.labels[i] for i in irect]
425
+ self.shapes = s[irect] # wh
426
+ ar = ar[irect]
427
+
428
+ # Set training image shapes
429
+ shapes = [[1, 1]] * nb
430
+ for i in range(nb):
431
+ ari = ar[bi == i]
432
+ mini, maxi = ari.min(), ari.max()
433
+ if maxi < 1:
434
+ shapes[i] = [maxi, 1]
435
+ elif mini > 1:
436
+ shapes[i] = [1, 1 / mini]
437
+
438
+ self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
439
+
440
+ # Check labels
441
+ create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
442
+ nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
443
+ pbar = enumerate(self.label_files)
444
+ if rank in [-1, 0]:
445
+ pbar = tqdm(pbar)
446
+ for i, file in pbar:
447
+ l = self.labels[i] # label
448
+ if l is not None and l.shape[0]:
449
+ assert l.shape[1] == 5, '> 5 label columns: %s' % file
450
+ assert (l >= 0).all(), 'negative labels: %s' % file
451
+ assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
452
+ if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
453
+ nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
454
+ if single_cls:
455
+ l[:, 0] = 0 # force dataset into single-class mode
456
+ self.labels[i] = l
457
+ nf += 1 # file found
458
+
459
+ # Create subdataset (a smaller dataset)
460
+ if create_datasubset and ns < 1E4:
461
+ if ns == 0:
462
+ create_folder(path='./datasubset')
463
+ os.makedirs('./datasubset/images')
464
+ exclude_classes = 43
465
+ if exclude_classes not in l[:, 0]:
466
+ ns += 1
467
+ # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
468
+ with open('./datasubset/images.txt', 'a') as f:
469
+ f.write(self.img_files[i] + '\n')
470
+
471
+ # Extract object detection boxes for a second stage classifier
472
+ if extract_bounding_boxes:
473
+ p = Path(self.img_files[i])
474
+ img = cv2.imread(str(p))
475
+ h, w = img.shape[:2]
476
+ for j, x in enumerate(l):
477
+ f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
478
+ if not os.path.exists(Path(f).parent):
479
+ os.makedirs(Path(f).parent) # make new output folder
480
+
481
+ b = x[1:] * [w, h, w, h] # box
482
+ b[2:] = b[2:].max() # rectangle to square
483
+ b[2:] = b[2:] * 1.3 + 30 # pad
484
+ b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
485
+
486
+ b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
487
+ b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
488
+ assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
489
+ else:
490
+ ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
491
+ # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
492
+
493
+ if rank in [-1, 0]:
494
+ pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
495
+ cache_path, nf, nm, ne, nd, n)
496
+ if nf == 0:
497
+ s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
498
+ print(s)
499
+ assert not augment, '%s. Can not train without labels.' % s
500
+
501
+ # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
502
+ self.imgs = [None] * n
503
+ if cache_images:
504
+ gb = 0 # Gigabytes of cached images
505
+ self.img_hw0, self.img_hw = [None] * n, [None] * n
506
+ results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
507
+ pbar = tqdm(enumerate(results), total=n)
508
+ for i, x in pbar:
509
+ self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
510
+ gb += self.imgs[i].nbytes
511
+ pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
512
+
513
+ def cache_labels(self, path='labels.cache3'):
514
+ # Cache dataset labels, check images and read shapes
515
+ x = {} # dict
516
+ pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
517
+ for (img, label) in pbar:
518
+ try:
519
+ l = []
520
+ im = Image.open(img)
521
+ im.verify() # PIL verify
522
+ shape = exif_size(im) # image size
523
+ assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
524
+ if os.path.isfile(label):
525
+ with open(label, 'r') as f:
526
+ l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels
527
+ if len(l) == 0:
528
+ l = np.zeros((0, 5), dtype=np.float32)
529
+ x[img] = [l, shape]
530
+ except Exception as e:
531
+ print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e))
532
+
533
+ x['hash'] = get_hash(self.label_files + self.img_files)
534
+ torch.save(x, path) # save for next time
535
+ return x
536
+
537
+ def __len__(self):
538
+ return len(self.img_files)
539
+
540
+ # def __iter__(self):
541
+ # self.count = -1
542
+ # print('ran dataset iter')
543
+ # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
544
+ # return self
545
+
546
+ def __getitem__(self, index):
547
+ if self.image_weights:
548
+ index = self.indices[index]
549
+
550
+ hyp = self.hyp
551
+ mosaic = self.mosaic and random.random() < hyp['mosaic']
552
+ if mosaic:
553
+ # Load mosaic
554
+ img, labels = load_mosaic(self, index)
555
+ #img, labels = load_mosaic9(self, index)
556
+ shapes = None
557
+
558
+ # MixUp https://arxiv.org/pdf/1710.09412.pdf
559
+ if random.random() < hyp['mixup']:
560
+ img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
561
+ #img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1))
562
+ r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
563
+ img = (img * r + img2 * (1 - r)).astype(np.uint8)
564
+ labels = np.concatenate((labels, labels2), 0)
565
+
566
+ else:
567
+ # Load image
568
+ img, (h0, w0), (h, w) = load_image(self, index)
569
+
570
+ # Letterbox
571
+ shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
572
+ img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
573
+ shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
574
+
575
+ # Load labels
576
+ labels = []
577
+ x = self.labels[index]
578
+ if x.size > 0:
579
+ # Normalized xywh to pixel xyxy format
580
+ labels = x.copy()
581
+ labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
582
+ labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
583
+ labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
584
+ labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
585
+
586
+ if self.augment:
587
+ # Augment imagespace
588
+ if not mosaic:
589
+ img, labels = random_perspective(img, labels,
590
+ degrees=hyp['degrees'],
591
+ translate=hyp['translate'],
592
+ scale=hyp['scale'],
593
+ shear=hyp['shear'],
594
+ perspective=hyp['perspective'])
595
+
596
+ # Augment colorspace
597
+ augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
598
+
599
+ # Apply cutouts
600
+ # if random.random() < 0.9:
601
+ # labels = cutout(img, labels)
602
+
603
+ nL = len(labels) # number of labels
604
+ if nL:
605
+ labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
606
+ labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
607
+ labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
608
+
609
+ if self.augment:
610
+ # flip up-down
611
+ if random.random() < hyp['flipud']:
612
+ img = np.flipud(img)
613
+ if nL:
614
+ labels[:, 2] = 1 - labels[:, 2]
615
+
616
+ # flip left-right
617
+ if random.random() < hyp['fliplr']:
618
+ img = np.fliplr(img)
619
+ if nL:
620
+ labels[:, 1] = 1 - labels[:, 1]
621
+
622
+ labels_out = torch.zeros((nL, 6))
623
+ if nL:
624
+ labels_out[:, 1:] = torch.from_numpy(labels)
625
+
626
+ # Convert
627
+ img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
628
+ img = np.ascontiguousarray(img)
629
+
630
+ return torch.from_numpy(img), labels_out, self.img_files[index], shapes
631
+
632
+ @staticmethod
633
+ def collate_fn(batch):
634
+ img, label, path, shapes = zip(*batch) # transposed
635
+ for i, l in enumerate(label):
636
+ l[:, 0] = i # add target image index for build_targets()
637
+ return torch.stack(img, 0), torch.cat(label, 0), path, shapes
638
+
639
+
640
+ class LoadImagesAndLabels9(Dataset): # for training/testing
641
+ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
642
+ cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1):
643
+ self.img_size = img_size
644
+ self.augment = augment
645
+ self.hyp = hyp
646
+ self.image_weights = image_weights
647
+ self.rect = False if image_weights else rect
648
+ self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
649
+ self.mosaic_border = [-img_size // 2, -img_size // 2]
650
+ self.stride = stride
651
+
652
+ def img2label_paths(img_paths):
653
+ # Define label paths as a function of image paths
654
+ sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
655
+ return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths]
656
+
657
+ try:
658
+ f = [] # image files
659
+ for p in path if isinstance(path, list) else [path]:
660
+ p = Path(p) # os-agnostic
661
+ if p.is_dir(): # dir
662
+ f += glob.glob(str(p / '**' / '*.*'), recursive=True)
663
+ elif p.is_file(): # file
664
+ with open(p, 'r') as t:
665
+ t = t.read().splitlines()
666
+ parent = str(p.parent) + os.sep
667
+ f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
668
+ else:
669
+ raise Exception('%s does not exist' % p)
670
+ self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
671
+ assert self.img_files, 'No images found'
672
+ except Exception as e:
673
+ raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
674
+
675
+ # Check cache
676
+ self.label_files = img2label_paths(self.img_files) # labels
677
+ cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels
678
+ if os.path.isfile(cache_path):
679
+ cache = torch.load(cache_path) # load
680
+ if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed
681
+ cache = self.cache_labels(cache_path) # re-cache
682
+ else:
683
+ cache = self.cache_labels(cache_path) # cache
684
+
685
+ # Read cache
686
+ cache.pop('hash') # remove hash
687
+ labels, shapes = zip(*cache.values())
688
+ self.labels = list(labels)
689
+ self.shapes = np.array(shapes, dtype=np.float64)
690
+ self.img_files = list(cache.keys()) # update
691
+ self.label_files = img2label_paths(cache.keys()) # update
692
+
693
+ n = len(shapes) # number of images
694
+ bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
695
+ nb = bi[-1] + 1 # number of batches
696
+ self.batch = bi # batch index of image
697
+ self.n = n
698
+
699
+ # Rectangular Training
700
+ if self.rect:
701
+ # Sort by aspect ratio
702
+ s = self.shapes # wh
703
+ ar = s[:, 1] / s[:, 0] # aspect ratio
704
+ irect = ar.argsort()
705
+ self.img_files = [self.img_files[i] for i in irect]
706
+ self.label_files = [self.label_files[i] for i in irect]
707
+ self.labels = [self.labels[i] for i in irect]
708
+ self.shapes = s[irect] # wh
709
+ ar = ar[irect]
710
+
711
+ # Set training image shapes
712
+ shapes = [[1, 1]] * nb
713
+ for i in range(nb):
714
+ ari = ar[bi == i]
715
+ mini, maxi = ari.min(), ari.max()
716
+ if maxi < 1:
717
+ shapes[i] = [maxi, 1]
718
+ elif mini > 1:
719
+ shapes[i] = [1, 1 / mini]
720
+
721
+ self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
722
+
723
+ # Check labels
724
+ create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
725
+ nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
726
+ pbar = enumerate(self.label_files)
727
+ if rank in [-1, 0]:
728
+ pbar = tqdm(pbar)
729
+ for i, file in pbar:
730
+ l = self.labels[i] # label
731
+ if l is not None and l.shape[0]:
732
+ assert l.shape[1] == 5, '> 5 label columns: %s' % file
733
+ assert (l >= 0).all(), 'negative labels: %s' % file
734
+ assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
735
+ if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
736
+ nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
737
+ if single_cls:
738
+ l[:, 0] = 0 # force dataset into single-class mode
739
+ self.labels[i] = l
740
+ nf += 1 # file found
741
+
742
+ # Create subdataset (a smaller dataset)
743
+ if create_datasubset and ns < 1E4:
744
+ if ns == 0:
745
+ create_folder(path='./datasubset')
746
+ os.makedirs('./datasubset/images')
747
+ exclude_classes = 43
748
+ if exclude_classes not in l[:, 0]:
749
+ ns += 1
750
+ # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
751
+ with open('./datasubset/images.txt', 'a') as f:
752
+ f.write(self.img_files[i] + '\n')
753
+
754
+ # Extract object detection boxes for a second stage classifier
755
+ if extract_bounding_boxes:
756
+ p = Path(self.img_files[i])
757
+ img = cv2.imread(str(p))
758
+ h, w = img.shape[:2]
759
+ for j, x in enumerate(l):
760
+ f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
761
+ if not os.path.exists(Path(f).parent):
762
+ os.makedirs(Path(f).parent) # make new output folder
763
+
764
+ b = x[1:] * [w, h, w, h] # box
765
+ b[2:] = b[2:].max() # rectangle to square
766
+ b[2:] = b[2:] * 1.3 + 30 # pad
767
+ b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
768
+
769
+ b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
770
+ b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
771
+ assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
772
+ else:
773
+ ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
774
+ # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
775
+
776
+ if rank in [-1, 0]:
777
+ pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
778
+ cache_path, nf, nm, ne, nd, n)
779
+ if nf == 0:
780
+ s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
781
+ print(s)
782
+ assert not augment, '%s. Can not train without labels.' % s
783
+
784
+ # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
785
+ self.imgs = [None] * n
786
+ if cache_images:
787
+ gb = 0 # Gigabytes of cached images
788
+ self.img_hw0, self.img_hw = [None] * n, [None] * n
789
+ results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
790
+ pbar = tqdm(enumerate(results), total=n)
791
+ for i, x in pbar:
792
+ self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
793
+ gb += self.imgs[i].nbytes
794
+ pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
795
+
796
+ def cache_labels(self, path='labels.cache3'):
797
+ # Cache dataset labels, check images and read shapes
798
+ x = {} # dict
799
+ pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
800
+ for (img, label) in pbar:
801
+ try:
802
+ l = []
803
+ im = Image.open(img)
804
+ im.verify() # PIL verify
805
+ shape = exif_size(im) # image size
806
+ assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
807
+ if os.path.isfile(label):
808
+ with open(label, 'r') as f:
809
+ l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels
810
+ if len(l) == 0:
811
+ l = np.zeros((0, 5), dtype=np.float32)
812
+ x[img] = [l, shape]
813
+ except Exception as e:
814
+ print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e))
815
+
816
+ x['hash'] = get_hash(self.label_files + self.img_files)
817
+ torch.save(x, path) # save for next time
818
+ return x
819
+
820
+ def __len__(self):
821
+ return len(self.img_files)
822
+
823
+ # def __iter__(self):
824
+ # self.count = -1
825
+ # print('ran dataset iter')
826
+ # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
827
+ # return self
828
+
829
+ def __getitem__(self, index):
830
+ if self.image_weights:
831
+ index = self.indices[index]
832
+
833
+ hyp = self.hyp
834
+ mosaic = self.mosaic and random.random() < hyp['mosaic']
835
+ if mosaic:
836
+ # Load mosaic
837
+ #img, labels = load_mosaic(self, index)
838
+ img, labels = load_mosaic9(self, index)
839
+ shapes = None
840
+
841
+ # MixUp https://arxiv.org/pdf/1710.09412.pdf
842
+ if random.random() < hyp['mixup']:
843
+ #img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
844
+ img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1))
845
+ r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
846
+ img = (img * r + img2 * (1 - r)).astype(np.uint8)
847
+ labels = np.concatenate((labels, labels2), 0)
848
+
849
+ else:
850
+ # Load image
851
+ img, (h0, w0), (h, w) = load_image(self, index)
852
+
853
+ # Letterbox
854
+ shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
855
+ img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
856
+ shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
857
+
858
+ # Load labels
859
+ labels = []
860
+ x = self.labels[index]
861
+ if x.size > 0:
862
+ # Normalized xywh to pixel xyxy format
863
+ labels = x.copy()
864
+ labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
865
+ labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
866
+ labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
867
+ labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
868
+
869
+ if self.augment:
870
+ # Augment imagespace
871
+ if not mosaic:
872
+ img, labels = random_perspective(img, labels,
873
+ degrees=hyp['degrees'],
874
+ translate=hyp['translate'],
875
+ scale=hyp['scale'],
876
+ shear=hyp['shear'],
877
+ perspective=hyp['perspective'])
878
+
879
+ # Augment colorspace
880
+ augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
881
+
882
+ # Apply cutouts
883
+ # if random.random() < 0.9:
884
+ # labels = cutout(img, labels)
885
+
886
+ nL = len(labels) # number of labels
887
+ if nL:
888
+ labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
889
+ labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
890
+ labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
891
+
892
+ if self.augment:
893
+ # flip up-down
894
+ if random.random() < hyp['flipud']:
895
+ img = np.flipud(img)
896
+ if nL:
897
+ labels[:, 2] = 1 - labels[:, 2]
898
+
899
+ # flip left-right
900
+ if random.random() < hyp['fliplr']:
901
+ img = np.fliplr(img)
902
+ if nL:
903
+ labels[:, 1] = 1 - labels[:, 1]
904
+
905
+ labels_out = torch.zeros((nL, 6))
906
+ if nL:
907
+ labels_out[:, 1:] = torch.from_numpy(labels)
908
+
909
+ # Convert
910
+ img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
911
+ img = np.ascontiguousarray(img)
912
+
913
+ return torch.from_numpy(img), labels_out, self.img_files[index], shapes
914
+
915
+ @staticmethod
916
+ def collate_fn(batch):
917
+ img, label, path, shapes = zip(*batch) # transposed
918
+ for i, l in enumerate(label):
919
+ l[:, 0] = i # add target image index for build_targets()
920
+ return torch.stack(img, 0), torch.cat(label, 0), path, shapes
921
+
922
+
923
+ # Ancillary functions --------------------------------------------------------------------------------------------------
924
+ def load_image(self, index):
925
+ # loads 1 image from dataset, returns img, original hw, resized hw
926
+ img = self.imgs[index]
927
+ if img is None: # not cached
928
+ path = self.img_files[index]
929
+ img = cv2.imread(path) # BGR
930
+ assert img is not None, 'Image Not Found ' + path
931
+ h0, w0 = img.shape[:2] # orig hw
932
+ r = self.img_size / max(h0, w0) # resize image to img_size
933
+ if r != 1: # always resize down, only resize up if training with augmentation
934
+ interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
935
+ img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
936
+ return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
937
+ else:
938
+ return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
939
+
940
+
941
+ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
942
+ r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
943
+ hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
944
+ dtype = img.dtype # uint8
945
+
946
+ x = np.arange(0, 256, dtype=np.int16)
947
+ lut_hue = ((x * r[0]) % 180).astype(dtype)
948
+ lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
949
+ lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
950
+
951
+ img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
952
+ cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
953
+
954
+ # Histogram equalization
955
+ # if random.random() < 0.2:
956
+ # for i in range(3):
957
+ # img[:, :, i] = cv2.equalizeHist(img[:, :, i])
958
+
959
+
960
+ def load_mosaic(self, index):
961
+ # loads images in a mosaic
962
+
963
+ labels4 = []
964
+ s = self.img_size
965
+ yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
966
+ indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
967
+ for i, index in enumerate(indices):
968
+ # Load image
969
+ img, _, (h, w) = load_image(self, index)
970
+
971
+ # place img in img4
972
+ if i == 0: # top left
973
+ img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
974
+ x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
975
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
976
+ elif i == 1: # top right
977
+ x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
978
+ x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
979
+ elif i == 2: # bottom left
980
+ x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
981
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
982
+ elif i == 3: # bottom right
983
+ x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
984
+ x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
985
+
986
+ img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
987
+ padw = x1a - x1b
988
+ padh = y1a - y1b
989
+
990
+ # Labels
991
+ x = self.labels[index]
992
+ labels = x.copy()
993
+ if x.size > 0: # Normalized xywh to pixel xyxy format
994
+ labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
995
+ labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
996
+ labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
997
+ labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
998
+ labels4.append(labels)
999
+
1000
+ # Concat/clip labels
1001
+ if len(labels4):
1002
+ labels4 = np.concatenate(labels4, 0)
1003
+ np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective
1004
+ # img4, labels4 = replicate(img4, labels4) # replicate
1005
+
1006
+ # Augment
1007
+ img4, labels4 = random_perspective(img4, labels4,
1008
+ degrees=self.hyp['degrees'],
1009
+ translate=self.hyp['translate'],
1010
+ scale=self.hyp['scale'],
1011
+ shear=self.hyp['shear'],
1012
+ perspective=self.hyp['perspective'],
1013
+ border=self.mosaic_border) # border to remove
1014
+
1015
+ return img4, labels4
1016
+
1017
+
1018
+ def load_mosaic9(self, index):
1019
+ # loads images in a 9-mosaic
1020
+
1021
+ labels9 = []
1022
+ s = self.img_size
1023
+ indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(8)] # 8 additional image indices
1024
+ for i, index in enumerate(indices):
1025
+ # Load image
1026
+ img, _, (h, w) = load_image(self, index)
1027
+
1028
+ # place img in img9
1029
+ if i == 0: # center
1030
+ img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
1031
+ h0, w0 = h, w
1032
+ c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
1033
+ elif i == 1: # top
1034
+ c = s, s - h, s + w, s
1035
+ elif i == 2: # top right
1036
+ c = s + wp, s - h, s + wp + w, s
1037
+ elif i == 3: # right
1038
+ c = s + w0, s, s + w0 + w, s + h
1039
+ elif i == 4: # bottom right
1040
+ c = s + w0, s + hp, s + w0 + w, s + hp + h
1041
+ elif i == 5: # bottom
1042
+ c = s + w0 - w, s + h0, s + w0, s + h0 + h
1043
+ elif i == 6: # bottom left
1044
+ c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
1045
+ elif i == 7: # left
1046
+ c = s - w, s + h0 - h, s, s + h0
1047
+ elif i == 8: # top left
1048
+ c = s - w, s + h0 - hp - h, s, s + h0 - hp
1049
+
1050
+ padx, pady = c[:2]
1051
+ x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords
1052
+
1053
+ # Labels
1054
+ x = self.labels[index]
1055
+ labels = x.copy()
1056
+ if x.size > 0: # Normalized xywh to pixel xyxy format
1057
+ labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padx
1058
+ labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + pady
1059
+ labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padx
1060
+ labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + pady
1061
+ labels9.append(labels)
1062
+
1063
+ # Image
1064
+ img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
1065
+ hp, wp = h, w # height, width previous
1066
+
1067
+ # Offset
1068
+ yc, xc = [int(random.uniform(0, s)) for x in self.mosaic_border] # mosaic center x, y
1069
+ img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
1070
+
1071
+ # Concat/clip labels
1072
+ if len(labels9):
1073
+ labels9 = np.concatenate(labels9, 0)
1074
+ labels9[:, [1, 3]] -= xc
1075
+ labels9[:, [2, 4]] -= yc
1076
+
1077
+ np.clip(labels9[:, 1:], 0, 2 * s, out=labels9[:, 1:]) # use with random_perspective
1078
+ # img9, labels9 = replicate(img9, labels9) # replicate
1079
+
1080
+ # Augment
1081
+ img9, labels9 = random_perspective(img9, labels9,
1082
+ degrees=self.hyp['degrees'],
1083
+ translate=self.hyp['translate'],
1084
+ scale=self.hyp['scale'],
1085
+ shear=self.hyp['shear'],
1086
+ perspective=self.hyp['perspective'],
1087
+ border=self.mosaic_border) # border to remove
1088
+
1089
+ return img9, labels9
1090
+
1091
+
1092
+ def replicate(img, labels):
1093
+ # Replicate labels
1094
+ h, w = img.shape[:2]
1095
+ boxes = labels[:, 1:].astype(int)
1096
+ x1, y1, x2, y2 = boxes.T
1097
+ s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
1098
+ for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
1099
+ x1b, y1b, x2b, y2b = boxes[i]
1100
+ bh, bw = y2b - y1b, x2b - x1b
1101
+ yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
1102
+ x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
1103
+ img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
1104
+ labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
1105
+
1106
+ return img, labels
1107
+
1108
+
1109
+ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, auto_size=32):
1110
+ # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
1111
+ shape = img.shape[:2] # current shape [height, width]
1112
+ if isinstance(new_shape, int):
1113
+ new_shape = (new_shape, new_shape)
1114
+
1115
+ # Scale ratio (new / old)
1116
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
1117
+ if not scaleup: # only scale down, do not scale up (for better test mAP)
1118
+ r = min(r, 1.0)
1119
+
1120
+ # Compute padding
1121
+ ratio = r, r # width, height ratios
1122
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
1123
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
1124
+ if auto: # minimum rectangle
1125
+ dw, dh = np.mod(dw, auto_size), np.mod(dh, auto_size) # wh padding
1126
+ elif scaleFill: # stretch
1127
+ dw, dh = 0.0, 0.0
1128
+ new_unpad = (new_shape[1], new_shape[0])
1129
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
1130
+
1131
+ dw /= 2 # divide padding into 2 sides
1132
+ dh /= 2
1133
+
1134
+ if shape[::-1] != new_unpad: # resize
1135
+ img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
1136
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
1137
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
1138
+ img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
1139
+ return img, ratio, (dw, dh)
1140
+
1141
+
1142
+ def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
1143
+ # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
1144
+ # targets = [cls, xyxy]
1145
+
1146
+ height = img.shape[0] + border[0] * 2 # shape(h,w,c)
1147
+ width = img.shape[1] + border[1] * 2
1148
+
1149
+ # Center
1150
+ C = np.eye(3)
1151
+ C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
1152
+ C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
1153
+
1154
+ # Perspective
1155
+ P = np.eye(3)
1156
+ P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
1157
+ P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
1158
+
1159
+ # Rotation and Scale
1160
+ R = np.eye(3)
1161
+ a = random.uniform(-degrees, degrees)
1162
+ # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
1163
+ s = random.uniform(1 - scale, 1 + scale)
1164
+ # s = 2 ** random.uniform(-scale, scale)
1165
+ R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
1166
+
1167
+ # Shear
1168
+ S = np.eye(3)
1169
+ S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
1170
+ S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
1171
+
1172
+ # Translation
1173
+ T = np.eye(3)
1174
+ T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
1175
+ T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
1176
+
1177
+ # Combined rotation matrix
1178
+ M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
1179
+ if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
1180
+ if perspective:
1181
+ img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
1182
+ else: # affine
1183
+ img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
1184
+
1185
+ # Visualize
1186
+ # import matplotlib.pyplot as plt
1187
+ # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
1188
+ # ax[0].imshow(img[:, :, ::-1]) # base
1189
+ # ax[1].imshow(img2[:, :, ::-1]) # warped
1190
+
1191
+ # Transform label coordinates
1192
+ n = len(targets)
1193
+ if n:
1194
+ # warp points
1195
+ xy = np.ones((n * 4, 3))
1196
+ xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
1197
+ xy = xy @ M.T # transform
1198
+ if perspective:
1199
+ xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
1200
+ else: # affine
1201
+ xy = xy[:, :2].reshape(n, 8)
1202
+
1203
+ # create new boxes
1204
+ x = xy[:, [0, 2, 4, 6]]
1205
+ y = xy[:, [1, 3, 5, 7]]
1206
+ xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
1207
+
1208
+ # # apply angle-based reduction of bounding boxes
1209
+ # radians = a * math.pi / 180
1210
+ # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
1211
+ # x = (xy[:, 2] + xy[:, 0]) / 2
1212
+ # y = (xy[:, 3] + xy[:, 1]) / 2
1213
+ # w = (xy[:, 2] - xy[:, 0]) * reduction
1214
+ # h = (xy[:, 3] - xy[:, 1]) * reduction
1215
+ # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
1216
+
1217
+ # clip boxes
1218
+ xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
1219
+ xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
1220
+
1221
+ # filter candidates
1222
+ i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
1223
+ targets = targets[i]
1224
+ targets[:, 1:5] = xy[i]
1225
+
1226
+ return img, targets
1227
+
1228
+
1229
+ def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n)
1230
+ # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
1231
+ w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
1232
+ w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
1233
+ ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
1234
+ return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates
1235
+
1236
+
1237
+ def cutout(image, labels):
1238
+ # Applies image cutout augmentation https://arxiv.org/abs/1708.04552
1239
+ h, w = image.shape[:2]
1240
+
1241
+ def bbox_ioa(box1, box2):
1242
+ # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
1243
+ box2 = box2.transpose()
1244
+
1245
+ # Get the coordinates of bounding boxes
1246
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
1247
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
1248
+
1249
+ # Intersection area
1250
+ inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
1251
+ (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
1252
+
1253
+ # box2 area
1254
+ box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
1255
+
1256
+ # Intersection over box2 area
1257
+ return inter_area / box2_area
1258
+
1259
+ # create random masks
1260
+ scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
1261
+ for s in scales:
1262
+ mask_h = random.randint(1, int(h * s))
1263
+ mask_w = random.randint(1, int(w * s))
1264
+
1265
+ # box
1266
+ xmin = max(0, random.randint(0, w) - mask_w // 2)
1267
+ ymin = max(0, random.randint(0, h) - mask_h // 2)
1268
+ xmax = min(w, xmin + mask_w)
1269
+ ymax = min(h, ymin + mask_h)
1270
+
1271
+ # apply random color mask
1272
+ image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
1273
+
1274
+ # return unobscured labels
1275
+ if len(labels) and s > 0.03:
1276
+ box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
1277
+ ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
1278
+ labels = labels[ioa < 0.60] # remove >60% obscured labels
1279
+
1280
+ return labels
1281
+
1282
+
1283
+ def create_folder(path='./new'):
1284
+ # Create folder
1285
+ if os.path.exists(path):
1286
+ shutil.rmtree(path) # delete output folder
1287
+ os.makedirs(path) # make new output folder
1288
+
1289
+
1290
+ def flatten_recursive(path='../coco128'):
1291
+ # Flatten a recursive directory by bringing all files to top level
1292
+ new_path = Path(path + '_flat')
1293
+ create_folder(new_path)
1294
+ for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
1295
+ shutil.copyfile(file, new_path / Path(file).name)
1296
+
1297
+
asone/detectors/yolor/utils/export.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ import torch
4
+ from asone.detectors.yolor.models.models import *
5
+ from asone.detectors.yolor.utils.google_utils import attempt_download
6
+
7
+ if __name__ == '__main__':
8
+ parser = argparse.ArgumentParser()
9
+ parser.add_argument('--weights', type=str, default='./yolov4.pt', help='weights path')
10
+ parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
11
+ parser.add_argument('--batch-size', type=int, default=1, help='batch size')
12
+ parser.add_argument('--cfg', type=str, default='cfg/yolor_p6.cfg', help='*.cfg path')
13
+ opt = parser.parse_args()
14
+ opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
15
+ # print(opt)
16
+
17
+ # Input
18
+ img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
19
+
20
+ # Load PyTorch model
21
+ attempt_download(opt.weights)
22
+ # print(ad)
23
+ # model = Darknet(cfg, ).cuda()
24
+ model.load_state_dict(torch.load(opt.weights, map_location=device)['model'])
25
+ print(type(model))
26
+ print("*"*50)
27
+ exit()
28
+ model.eval()
29
+ model.model[-1].export = True # set Detect() layer export=True
30
+ y = model(img) # dry run
31
+
32
+ # print("-------------------")
33
+ # model = Darknet(cfg, imgsz).cuda()
34
+ # model.load_state_dict(torch.load(weights[0], map_location=device)['model'])
35
+ #model = attempt_load(weights, map_location=device) # load FP32 model
36
+ #imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
37
+ # model.to(device).eval()
38
+ # TorchScript export
39
+ try:
40
+ print('\nStarting TorchScript export with torch %s...' % torch.__version__)
41
+ f = opt.weights.replace('.pt', '.torchscript.pt') # filename
42
+ ts = torch.jit.trace(model, img)
43
+ ts.save(f)
44
+ print('TorchScript export success, saved as %s' % f)
45
+ except Exception as e:
46
+ print('TorchScript export failure: %s' % e)
47
+
48
+ # ONNX export
49
+ try:
50
+ import onnx
51
+
52
+ print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
53
+ f = opt.weights.replace('.pt', '.onnx') # filename
54
+ model.fuse() # only for ONNX
55
+ torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
56
+ output_names=['classes', 'boxes'] if y is None else ['output'])
57
+
58
+ # Checks
59
+ onnx_model = onnx.load(f) # load onnx model
60
+ onnx.checker.check_model(onnx_model) # check onnx model
61
+ print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
62
+ print('ONNX export success, saved as %s' % f)
63
+ except Exception as e:
64
+ print('ONNX export failure: %s' % e)
65
+
66
+ # CoreML export
67
+ try:
68
+ import coremltools as ct
69
+
70
+ print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
71
+ # convert model from torchscript and apply pixel scaling as per detect.py
72
+ model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
73
+ f = opt.weights.replace('.pt', '.mlmodel') # filename
74
+ model.save(f)
75
+ print('CoreML export success, saved as %s' % f)
76
+ except Exception as e:
77
+ print('CoreML export failure: %s' % e)
78
+
79
+ # Finish
80
+ print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
asone/detectors/yolor/utils/general.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # General utils
2
+
3
+ import glob
4
+ import logging
5
+ import math
6
+ import os
7
+ import platform
8
+ import random
9
+ import re
10
+ import subprocess
11
+ import time
12
+ from pathlib import Path
13
+
14
+ import cv2
15
+ import matplotlib
16
+ import numpy as np
17
+ import torch
18
+ import yaml
19
+
20
+ from asone.detectors.yolor.utils.google_utils import gsutil_getsize
21
+ from asone.detectors.yolor.utils.metrics import fitness
22
+ from asone.detectors.yolor.utils.torch_utils import init_torch_seeds
23
+
24
+ # Set printoptions
25
+ torch.set_printoptions(linewidth=320, precision=5, profile='long')
26
+ np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
27
+ matplotlib.rc('font', **{'size': 11})
28
+
29
+ # Prevent OpenCV from multithreading (to use PyTorch DataLoader)
30
+ cv2.setNumThreads(0)
31
+
32
+
33
+ def set_logging(rank=-1):
34
+ logging.basicConfig(
35
+ format="%(message)s",
36
+ level=logging.INFO if rank in [-1, 0] else logging.WARN)
37
+
38
+
39
+ def init_seeds(seed=0):
40
+ random.seed(seed)
41
+ np.random.seed(seed)
42
+ init_torch_seeds(seed)
43
+
44
+
45
+ def get_latest_run(search_dir='.'):
46
+ # Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
47
+ last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
48
+ return max(last_list, key=os.path.getctime) if last_list else ''
49
+
50
+
51
+ def check_git_status():
52
+ # Suggest 'git pull' if repo is out of date
53
+ if platform.system() in ['Linux', 'Darwin'] and not os.path.isfile('/.dockerenv'):
54
+ s = subprocess.check_output('if [ -d .git ]; then git fetch && git status -uno; fi', shell=True).decode('utf-8')
55
+ if 'Your branch is behind' in s:
56
+ print(s[s.find('Your branch is behind'):s.find('\n\n')] + '\n')
57
+
58
+
59
+ def check_img_size(img_size, s=32):
60
+ # Verify img_size is a multiple of stride s
61
+ new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
62
+ if new_size != img_size:
63
+ print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
64
+ return new_size
65
+
66
+
67
+ def check_file(file):
68
+ # Search for file if not found
69
+ if os.path.isfile(file) or file == '':
70
+ return file
71
+ else:
72
+ files = glob.glob('./**/' + file, recursive=True) # find file
73
+ assert len(files), 'File Not Found: %s' % file # assert file was found
74
+ assert len(files) == 1, "Multiple files match '%s', specify exact path: %s" % (file, files) # assert unique
75
+ return files[0] # return file
76
+
77
+
78
+ def check_dataset(dict):
79
+ # Download dataset if not found locally
80
+ val, s = dict.get('val'), dict.get('download')
81
+ if val and len(val):
82
+ val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
83
+ if not all(x.exists() for x in val):
84
+ print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
85
+ if s and len(s): # download script
86
+ print('Downloading %s ...' % s)
87
+ if s.startswith('http') and s.endswith('.zip'): # URL
88
+ f = Path(s).name # filename
89
+ torch.hub.download_url_to_file(s, f)
90
+ r = os.system('unzip -q %s -d ../ && rm %s' % (f, f)) # unzip
91
+ else: # bash script
92
+ r = os.system(s)
93
+ print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value
94
+ else:
95
+ raise Exception('Dataset not found.')
96
+
97
+
98
+ def make_divisible(x, divisor):
99
+ # Returns x evenly divisible by divisor
100
+ return math.ceil(x / divisor) * divisor
101
+
102
+
103
+ def labels_to_class_weights(labels, nc=80):
104
+ # Get class weights (inverse frequency) from training labels
105
+ if labels[0] is None: # no labels loaded
106
+ return torch.Tensor()
107
+
108
+ labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
109
+ classes = labels[:, 0].astype(np.int) # labels = [class xywh]
110
+ weights = np.bincount(classes, minlength=nc) # occurrences per class
111
+
112
+ # Prepend gridpoint count (for uCE training)
113
+ # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
114
+ # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
115
+
116
+ weights[weights == 0] = 1 # replace empty bins with 1
117
+ weights = 1 / weights # number of targets per class
118
+ weights /= weights.sum() # normalize
119
+ return torch.from_numpy(weights)
120
+
121
+
122
+ def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
123
+ # Produces image weights based on class mAPs
124
+ n = len(labels)
125
+ class_counts = np.array([np.bincount(labels[i][:, 0].astype(np.int), minlength=nc) for i in range(n)])
126
+ image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
127
+ # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
128
+ return image_weights
129
+
130
+
131
+ def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
132
+ # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
133
+ # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
134
+ # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
135
+ # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
136
+ # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
137
+ x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
138
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
139
+ 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
140
+ return x
141
+
142
+
143
+ def xyxy2xywh(x):
144
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
145
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
146
+ y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
147
+ y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
148
+ y[:, 2] = x[:, 2] - x[:, 0] # width
149
+ y[:, 3] = x[:, 3] - x[:, 1] # height
150
+ return y
151
+
152
+
153
+ def xywh2xyxy(x):
154
+ # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
155
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
156
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
157
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
158
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
159
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
160
+ return y
161
+
162
+
163
+ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
164
+ # Rescale coords (xyxy) from img1_shape to img0_shape
165
+ if ratio_pad is None: # calculate from img0_shape
166
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
167
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
168
+ else:
169
+ gain = ratio_pad[0][0]
170
+ pad = ratio_pad[1]
171
+
172
+ coords[:, [0, 2]] -= pad[0] # x padding
173
+ coords[:, [1, 3]] -= pad[1] # y padding
174
+ coords[:, :4] /= gain
175
+ clip_coords(coords, img0_shape)
176
+ return coords
177
+
178
+
179
+ def clip_coords(boxes, img_shape):
180
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
181
+ boxes[:, 0].clamp_(0, img_shape[1]) # x1
182
+ boxes[:, 1].clamp_(0, img_shape[0]) # y1
183
+ boxes[:, 2].clamp_(0, img_shape[1]) # x2
184
+ boxes[:, 3].clamp_(0, img_shape[0]) # y2
185
+
186
+
187
+ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, EIoU=False, ECIoU=False, eps=1e-9):
188
+ # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
189
+ box2 = box2.T
190
+
191
+ # Get the coordinates of bounding boxes
192
+ if x1y1x2y2: # x1, y1, x2, y2 = box1
193
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
194
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
195
+ else: # transform from xywh to xyxy
196
+ b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
197
+ b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
198
+ b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
199
+ b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
200
+
201
+ # Intersection area
202
+ inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
203
+ (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
204
+
205
+ # Union Area
206
+ w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
207
+ w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
208
+ union = w1 * h1 + w2 * h2 - inter + eps
209
+
210
+ iou = inter / union
211
+ if GIoU or DIoU or CIoU or EIoU or ECIoU:
212
+ cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width
213
+ ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
214
+ if CIoU or DIoU or EIoU or ECIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
215
+ c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
216
+ rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
217
+ (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
218
+ if DIoU:
219
+ return iou - rho2 / c2 # DIoU
220
+ elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
221
+ v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
222
+ with torch.no_grad():
223
+ alpha = v / ((1 + eps) - iou + v)
224
+ return iou - (rho2 / c2 + v * alpha) # CIoU
225
+ elif EIoU: # Efficient IoU https://arxiv.org/abs/2101.08158
226
+ rho3 = (w1-w2) **2
227
+ c3 = cw ** 2 + eps
228
+ rho4 = (h1-h2) **2
229
+ c4 = ch ** 2 + eps
230
+ return iou - rho2 / c2 - rho3 / c3 - rho4 / c4 # EIoU
231
+ elif ECIoU:
232
+ v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
233
+ with torch.no_grad():
234
+ alpha = v / ((1 + eps) - iou + v)
235
+ rho3 = (w1-w2) **2
236
+ c3 = cw ** 2 + eps
237
+ rho4 = (h1-h2) **2
238
+ c4 = ch ** 2 + eps
239
+ return iou - v * alpha - rho2 / c2 - rho3 / c3 - rho4 / c4 # ECIoU
240
+ else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
241
+ c_area = cw * ch + eps # convex area
242
+ return iou - (c_area - union) / c_area # GIoU
243
+ else:
244
+ return iou # IoU
245
+
246
+
247
+ def box_iou(box1, box2):
248
+ # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
249
+ """
250
+ Return intersection-over-union (Jaccard index) of boxes.
251
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
252
+ Arguments:
253
+ box1 (Tensor[N, 4])
254
+ box2 (Tensor[M, 4])
255
+ Returns:
256
+ iou (Tensor[N, M]): the NxM matrix containing the pairwise
257
+ IoU values for every element in boxes1 and boxes2
258
+ """
259
+
260
+ def box_area(box):
261
+ # box = 4xn
262
+ return (box[2] - box[0]) * (box[3] - box[1])
263
+
264
+ area1 = box_area(box1.T)
265
+ area2 = box_area(box2.T)
266
+
267
+ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
268
+ inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
269
+ return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
270
+
271
+
272
+ def wh_iou(wh1, wh2):
273
+ # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
274
+ wh1 = wh1[:, None] # [N,1,2]
275
+ wh2 = wh2[None] # [1,M,2]
276
+ inter = torch.min(wh1, wh2).prod(2) # [N,M]
277
+ return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter)
278
+
279
+
280
+ def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False):
281
+ """Performs Non-Maximum Suppression (NMS) on inference results
282
+
283
+ Returns:
284
+ detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
285
+ """
286
+
287
+ nc = prediction[0].shape[1] - 5 # number of classes
288
+ xc = prediction[..., 4] > conf_thres # candidates
289
+
290
+ # Settings
291
+ min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
292
+ max_det = 300 # maximum number of detections per image
293
+ time_limit = 10.0 # seconds to quit after
294
+ redundant = True # require redundant detections
295
+ multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
296
+
297
+ t = time.time()
298
+ output = [torch.zeros(0, 6)] * prediction.shape[0]
299
+ for xi, x in enumerate(prediction): # image index, image inference
300
+ # Apply constraints
301
+ # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
302
+ x = x[xc[xi]] # confidence
303
+
304
+ # If none remain process next image
305
+ if not x.shape[0]:
306
+ continue
307
+
308
+ # Compute conf
309
+ x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
310
+
311
+ # Box (center x, center y, width, height) to (x1, y1, x2, y2)
312
+ box = xywh2xyxy(x[:, :4])
313
+
314
+ # Detections matrix nx6 (xyxy, conf, cls)
315
+ if multi_label:
316
+ i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
317
+ x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
318
+ else: # best class only
319
+ conf, j = x[:, 5:].max(1, keepdim=True)
320
+ x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
321
+
322
+ # Filter by class
323
+ if classes:
324
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
325
+
326
+ # Apply finite constraint
327
+ # if not torch.isfinite(x).all():
328
+ # x = x[torch.isfinite(x).all(1)]
329
+
330
+ # If none remain process next image
331
+ n = x.shape[0] # number of boxes
332
+ if not n:
333
+ continue
334
+
335
+ # Sort by confidence
336
+ # x = x[x[:, 4].argsort(descending=True)]
337
+
338
+ # Batched NMS
339
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
340
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
341
+ i = torch.ops.torchvision.nms(boxes, scores, iou_thres)
342
+ if i.shape[0] > max_det: # limit detections
343
+ i = i[:max_det]
344
+ if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
345
+ # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
346
+ iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
347
+ weights = iou * scores[None] # box weights
348
+ x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
349
+ if redundant:
350
+ i = i[iou.sum(1) > 1] # require redundancy
351
+
352
+ output[xi] = x[i]
353
+ if (time.time() - t) > time_limit:
354
+ break # time limit exceeded
355
+
356
+ return output
357
+
358
+
359
+ def strip_optimizer(f='weights/best.pt', s=''): # from utils.general import *; strip_optimizer()
360
+ # Strip optimizer from 'f' to finalize training, optionally save as 's'
361
+ x = torch.load(f, map_location=torch.device('cpu'))
362
+ x['optimizer'] = None
363
+ x['training_results'] = None
364
+ x['epoch'] = -1
365
+ #x['model'].half() # to FP16
366
+ #for p in x['model'].parameters():
367
+ # p.requires_grad = False
368
+ torch.save(x, s or f)
369
+ mb = os.path.getsize(s or f) / 1E6 # filesize
370
+ print('Optimizer stripped from %s,%s %.1fMB' % (f, (' saved as %s,' % s) if s else '', mb))
371
+
372
+
373
+ def print_mutation(hyp, results, yaml_file='hyp_evolved.yaml', bucket=''):
374
+ # Print mutation results to evolve.txt (for use with train.py --evolve)
375
+ a = '%10s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys
376
+ b = '%10.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values
377
+ c = '%10.4g' * len(results) % results # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
378
+ print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
379
+
380
+ if bucket:
381
+ url = 'gs://%s/evolve.txt' % bucket
382
+ if gsutil_getsize(url) > (os.path.getsize('evolve.txt') if os.path.exists('evolve.txt') else 0):
383
+ os.system('gsutil cp %s .' % url) # download evolve.txt if larger than local
384
+
385
+ with open('evolve.txt', 'a') as f: # append result
386
+ f.write(c + b + '\n')
387
+ x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0) # load unique rows
388
+ x = x[np.argsort(-fitness(x))] # sort
389
+ np.savetxt('evolve.txt', x, '%10.3g') # save sort by fitness
390
+
391
+ # Save yaml
392
+ for i, k in enumerate(hyp.keys()):
393
+ hyp[k] = float(x[0, i + 7])
394
+ with open(yaml_file, 'w') as f:
395
+ results = tuple(x[0, :7])
396
+ c = '%10.4g' * len(results) % results # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
397
+ f.write('# Hyperparameter Evolution Results\n# Generations: %g\n# Metrics: ' % len(x) + c + '\n\n')
398
+ yaml.dump(hyp, f, sort_keys=False)
399
+
400
+ if bucket:
401
+ os.system('gsutil cp evolve.txt %s gs://%s' % (yaml_file, bucket)) # upload
402
+
403
+
404
+ def apply_classifier(x, model, img, im0):
405
+ # applies a second stage classifier to yolo outputs
406
+ im0 = [im0] if isinstance(im0, np.ndarray) else im0
407
+ for i, d in enumerate(x): # per image
408
+ if d is not None and len(d):
409
+ d = d.clone()
410
+
411
+ # Reshape and pad cutouts
412
+ b = xyxy2xywh(d[:, :4]) # boxes
413
+ b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square
414
+ b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad
415
+ d[:, :4] = xywh2xyxy(b).long()
416
+
417
+ # Rescale boxes from img_size to im0 size
418
+ scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
419
+
420
+ # Classes
421
+ pred_cls1 = d[:, 5].long()
422
+ ims = []
423
+ for j, a in enumerate(d): # per item
424
+ cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
425
+ im = cv2.resize(cutout, (224, 224)) # BGR
426
+ # cv2.imwrite('test%i.jpg' % j, cutout)
427
+
428
+ im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
429
+ im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32
430
+ im /= 255.0 # 0 - 255 to 0.0 - 1.0
431
+ ims.append(im)
432
+
433
+ pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction
434
+ x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections
435
+
436
+ return x
437
+
438
+
439
+ def increment_path(path, exist_ok=True, sep=''):
440
+ # Increment path, i.e. runs/exp --> runs/exp{sep}0, runs/exp{sep}1 etc.
441
+ path = Path(path) # os-agnostic
442
+ if (path.exists() and exist_ok) or (not path.exists()):
443
+ return str(path)
444
+ else:
445
+ dirs = glob.glob(f"{path}{sep}*") # similar paths
446
+ matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
447
+ i = [int(m.groups()[0]) for m in matches if m] # indices
448
+ n = max(i) + 1 if i else 2 # increment number
449
+ return f"{path}{sep}{n}" # update path
asone/detectors/yolor/utils/google_utils.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Google utils: https://cloud.google.com/storage/docs/reference/libraries
2
+
3
+ import os
4
+ import platform
5
+ import subprocess
6
+ import time
7
+ from pathlib import Path
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+
13
+ def gsutil_getsize(url=''):
14
+ # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
15
+ s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8')
16
+ return eval(s.split(' ')[0]) if len(s) else 0 # bytes
17
+
18
+
19
+ def attempt_download(weights):
20
+ # Attempt to download pretrained weights if not found locally
21
+ weights = weights.strip().replace("'", '')
22
+ file = Path(weights).name
23
+
24
+ msg = weights + ' missing, try downloading from https://github.com/WongKinYiu/yolor/releases/'
25
+ models = ['yolor_p6.pt', 'yolor_w6.pt'] # available models
26
+
27
+ if file in models and not os.path.isfile(weights):
28
+
29
+ try: # GitHub
30
+ url = 'https://github.com/WongKinYiu/yolor/releases/download/v1.0/' + file
31
+ print('Downloading %s to %s...' % (url, weights))
32
+ torch.hub.download_url_to_file(url, weights)
33
+ assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check
34
+ except Exception as e: # GCP
35
+ print('ERROR: Download failure.')
36
+ print('')
37
+
38
+
39
+ def attempt_load(weights, map_location=None):
40
+ # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
41
+ model = Ensemble()
42
+ for w in weights if isinstance(weights, list) else [weights]:
43
+ attempt_download(w)
44
+ model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model
45
+
46
+ if len(model) == 1:
47
+ return model[-1] # return model
48
+ else:
49
+ print('Ensemble created with %s\n' % weights)
50
+ for k in ['names', 'stride']:
51
+ setattr(model, k, getattr(model[-1], k))
52
+ return model # return ensemble
53
+
54
+
55
+ def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'):
56
+ # Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download()
57
+ t = time.time()
58
+
59
+ print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
60
+ os.remove(name) if os.path.exists(name) else None # remove existing
61
+ os.remove('cookie') if os.path.exists('cookie') else None
62
+
63
+ # Attempt file download
64
+ out = "NUL" if platform.system() == "Windows" else "/dev/null"
65
+ os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out))
66
+ if os.path.exists('cookie'): # large file
67
+ s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name)
68
+ else: # small file
69
+ s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id)
70
+ r = os.system(s) # execute, capture return
71
+ os.remove('cookie') if os.path.exists('cookie') else None
72
+
73
+ # Error check
74
+ if r != 0:
75
+ os.remove(name) if os.path.exists(name) else None # remove partial
76
+ print('Download error ') # raise Exception('Download error')
77
+ return r
78
+
79
+ # Unzip if archive
80
+ if name.endswith('.zip'):
81
+ print('unzipping... ', end='')
82
+ os.system('unzip -q %s' % name) # unzip
83
+ os.remove(name) # remove zip to free space
84
+
85
+ print('Done (%.1fs)' % (time.time() - t))
86
+ return r
87
+
88
+
89
+ def get_token(cookie="./cookie"):
90
+ with open(cookie) as f:
91
+ for line in f:
92
+ if "download" in line:
93
+ return line.split()[-1]
94
+ return ""
95
+
96
+ class Ensemble(nn.ModuleList):
97
+ # Ensemble of models
98
+ def __init__(self):
99
+ super().__init__()
100
+
101
+ def forward(self, x, augment=False, profile=False, visualize=False):
102
+ y = [module(x, augment, profile, visualize)[0] for module in self]
103
+ # y = torch.stack(y).max(0)[0] # max ensemble
104
+ # y = torch.stack(y).mean(0) # mean ensemble
105
+ y = torch.cat(y, 1) # nms ensemble
106
+ return y, None # inference, train output
107
+ # def upload_blob(bucket_name, source_file_name, destination_blob_name):
108
+ # # Uploads a file to a bucket
109
+ # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
110
+ #
111
+ # storage_client = storage.Client()
112
+ # bucket = storage_client.get_bucket(bucket_name)
113
+ # blob = bucket.blob(destination_blob_name)
114
+ #
115
+ # blob.upload_from_filename(source_file_name)
116
+ #
117
+ # print('File {} uploaded to {}.'.format(
118
+ # source_file_name,
119
+ # destination_blob_name))
120
+ #
121
+ #
122
+ # def download_blob(bucket_name, source_blob_name, destination_file_name):
123
+ # # Uploads a blob from a bucket
124
+ # storage_client = storage.Client()
125
+ # bucket = storage_client.get_bucket(bucket_name)
126
+ # blob = bucket.blob(source_blob_name)
127
+ #
128
+ # blob.download_to_filename(destination_file_name)
129
+ #
130
+ # print('Blob {} downloaded to {}.'.format(
131
+ # source_blob_name,
132
+ # destination_file_name))
asone/detectors/yolor/utils/layers.py ADDED
@@ -0,0 +1,532 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from asone.detectors.yolor.utils.general import *
2
+
3
+ import torch
4
+ from torch import nn
5
+ import torch.nn.functional as F
6
+ try:
7
+ from mish_cuda import MishCuda as Mish
8
+
9
+ except:
10
+ class Mish(nn.Module): # https://github.com/digantamisra98/Mish
11
+ def forward(self, x):
12
+ return x * F.softplus(x).tanh()
13
+
14
+ try:
15
+ from pytorch_wavelets import DWTForward, DWTInverse
16
+
17
+ class DWT(nn.Module):
18
+ def __init__(self):
19
+ super(DWT, self).__init__()
20
+ self.xfm = DWTForward(J=1, wave='db1', mode='zero')
21
+
22
+ def forward(self, x):
23
+ b,c,w,h = x.shape
24
+ yl, yh = self.xfm(x)
25
+ return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1)
26
+
27
+ except: # using Reorg instead
28
+ class DWT(nn.Module):
29
+ def forward(self, x):
30
+ return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
31
+
32
+
33
+ class Reorg(nn.Module):
34
+ def forward(self, x):
35
+ return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
36
+
37
+
38
+ def make_divisible(v, divisor):
39
+ # Function ensures all layers have a channel number that is divisible by 8
40
+ # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
41
+ return math.ceil(v / divisor) * divisor
42
+
43
+
44
+ class Flatten(nn.Module):
45
+ # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
46
+ def forward(self, x):
47
+ return x.view(x.size(0), -1)
48
+
49
+
50
+ class Concat(nn.Module):
51
+ # Concatenate a list of tensors along dimension
52
+ def __init__(self, dimension=1):
53
+ super(Concat, self).__init__()
54
+ self.d = dimension
55
+
56
+ def forward(self, x):
57
+ return torch.cat(x, self.d)
58
+
59
+
60
+ class FeatureConcat(nn.Module):
61
+ def __init__(self, layers):
62
+ super(FeatureConcat, self).__init__()
63
+ self.layers = layers # layer indices
64
+ self.multiple = len(layers) > 1 # multiple layers flag
65
+
66
+ def forward(self, x, outputs):
67
+ return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]]
68
+
69
+
70
+ class FeatureConcat2(nn.Module):
71
+ def __init__(self, layers):
72
+ super(FeatureConcat2, self).__init__()
73
+ self.layers = layers # layer indices
74
+ self.multiple = len(layers) > 1 # multiple layers flag
75
+
76
+ def forward(self, x, outputs):
77
+ return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach()], 1)
78
+
79
+
80
+ class FeatureConcat3(nn.Module):
81
+ def __init__(self, layers):
82
+ super(FeatureConcat3, self).__init__()
83
+ self.layers = layers # layer indices
84
+ self.multiple = len(layers) > 1 # multiple layers flag
85
+
86
+ def forward(self, x, outputs):
87
+ return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach(), outputs[self.layers[2]].detach()], 1)
88
+
89
+
90
+ class FeatureConcat_l(nn.Module):
91
+ def __init__(self, layers):
92
+ super(FeatureConcat_l, self).__init__()
93
+ self.layers = layers # layer indices
94
+ self.multiple = len(layers) > 1 # multiple layers flag
95
+
96
+ def forward(self, x, outputs):
97
+ return torch.cat([outputs[i][:,:outputs[i].shape[1]//2,:,:] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]][:,:outputs[self.layers[0]].shape[1]//2,:,:]
98
+
99
+
100
+ class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
101
+ def __init__(self, layers, weight=False):
102
+ super(WeightedFeatureFusion, self).__init__()
103
+ self.layers = layers # layer indices
104
+ self.weight = weight # apply weights boolean
105
+ self.n = len(layers) + 1 # number of layers
106
+ if weight:
107
+ self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True) # layer weights
108
+
109
+ def forward(self, x, outputs):
110
+ # Weights
111
+ if self.weight:
112
+ w = torch.sigmoid(self.w) * (2 / self.n) # sigmoid weights (0-1)
113
+ x = x * w[0]
114
+
115
+ # Fusion
116
+ nx = x.shape[1] # input channels
117
+ for i in range(self.n - 1):
118
+ a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]] # feature to add
119
+ na = a.shape[1] # feature channels
120
+
121
+ # Adjust channels
122
+ if nx == na: # same shape
123
+ x = x + a
124
+ elif nx > na: # slice input
125
+ x[:, :na] = x[:, :na] + a # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a
126
+ else: # slice feature
127
+ x = x + a[:, :nx]
128
+
129
+ return x
130
+
131
+
132
+ class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595
133
+ def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'):
134
+ super(MixConv2d, self).__init__()
135
+
136
+ groups = len(k)
137
+ if method == 'equal_ch': # equal channels per group
138
+ i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices
139
+ ch = [(i == g).sum() for g in range(groups)]
140
+ else: # 'equal_params': equal parameter count per group
141
+ b = [out_ch] + [0] * groups
142
+ a = np.eye(groups + 1, groups, k=-1)
143
+ a -= np.roll(a, 1, axis=1)
144
+ a *= np.array(k) ** 2
145
+ a[0] = 1
146
+ ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b
147
+
148
+ self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch,
149
+ out_channels=ch[g],
150
+ kernel_size=k[g],
151
+ stride=stride,
152
+ padding=k[g] // 2, # 'same' pad
153
+ dilation=dilation,
154
+ bias=bias) for g in range(groups)])
155
+
156
+ def forward(self, x):
157
+ return torch.cat([m(x) for m in self.m], 1)
158
+
159
+
160
+ # Activation functions below -------------------------------------------------------------------------------------------
161
+ class SwishImplementation(torch.autograd.Function):
162
+ @staticmethod
163
+ def forward(ctx, x):
164
+ ctx.save_for_backward(x)
165
+ return x * torch.sigmoid(x)
166
+
167
+ @staticmethod
168
+ def backward(ctx, grad_output):
169
+ x = ctx.saved_tensors[0]
170
+ sx = torch.sigmoid(x) # sigmoid(ctx)
171
+ return grad_output * (sx * (1 + x * (1 - sx)))
172
+
173
+
174
+ class MishImplementation(torch.autograd.Function):
175
+ @staticmethod
176
+ def forward(ctx, x):
177
+ ctx.save_for_backward(x)
178
+ return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
179
+
180
+ @staticmethod
181
+ def backward(ctx, grad_output):
182
+ x = ctx.saved_tensors[0]
183
+ sx = torch.sigmoid(x)
184
+ fx = F.softplus(x).tanh()
185
+ return grad_output * (fx + x * sx * (1 - fx * fx))
186
+
187
+
188
+ class MemoryEfficientSwish(nn.Module):
189
+ def forward(self, x):
190
+ return SwishImplementation.apply(x)
191
+
192
+
193
+ class MemoryEfficientMish(nn.Module):
194
+ def forward(self, x):
195
+ return MishImplementation.apply(x)
196
+
197
+
198
+ class Swish(nn.Module):
199
+ def forward(self, x):
200
+ return x * torch.sigmoid(x)
201
+
202
+
203
+ class HardSwish(nn.Module): # https://arxiv.org/pdf/1905.02244.pdf
204
+ def forward(self, x):
205
+ return x * F.hardtanh(x + 3, 0., 6., True) / 6.
206
+
207
+
208
+ class DeformConv2d(nn.Module):
209
+ def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None, modulation=False):
210
+ """
211
+ Args:
212
+ modulation (bool, optional): If True, Modulated Defomable Convolution (Deformable ConvNets v2).
213
+ """
214
+ super(DeformConv2d, self).__init__()
215
+ self.kernel_size = kernel_size
216
+ self.padding = padding
217
+ self.stride = stride
218
+ self.zero_padding = nn.ZeroPad2d(padding)
219
+ self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias)
220
+
221
+ self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride)
222
+ nn.init.constant_(self.p_conv.weight, 0)
223
+ self.p_conv.register_backward_hook(self._set_lr)
224
+
225
+ self.modulation = modulation
226
+ if modulation:
227
+ self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride)
228
+ nn.init.constant_(self.m_conv.weight, 0)
229
+ self.m_conv.register_backward_hook(self._set_lr)
230
+
231
+ @staticmethod
232
+ def _set_lr(module, grad_input, grad_output):
233
+ grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input)))
234
+ grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output)))
235
+
236
+ def forward(self, x):
237
+ offset = self.p_conv(x)
238
+ if self.modulation:
239
+ m = torch.sigmoid(self.m_conv(x))
240
+
241
+ dtype = offset.data.type()
242
+ ks = self.kernel_size
243
+ N = offset.size(1) // 2
244
+
245
+ if self.padding:
246
+ x = self.zero_padding(x)
247
+
248
+ # (b, 2N, h, w)
249
+ p = self._get_p(offset, dtype)
250
+
251
+ # (b, h, w, 2N)
252
+ p = p.contiguous().permute(0, 2, 3, 1)
253
+ q_lt = p.detach().floor()
254
+ q_rb = q_lt + 1
255
+
256
+ q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long()
257
+ q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long()
258
+ q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1)
259
+ q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1)
260
+
261
+ # clip p
262
+ p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1)
263
+
264
+ # bilinear kernel (b, h, w, N)
265
+ g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:]))
266
+ g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:]))
267
+ g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:]))
268
+ g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:]))
269
+
270
+ # (b, c, h, w, N)
271
+ x_q_lt = self._get_x_q(x, q_lt, N)
272
+ x_q_rb = self._get_x_q(x, q_rb, N)
273
+ x_q_lb = self._get_x_q(x, q_lb, N)
274
+ x_q_rt = self._get_x_q(x, q_rt, N)
275
+
276
+ # (b, c, h, w, N)
277
+ x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \
278
+ g_rb.unsqueeze(dim=1) * x_q_rb + \
279
+ g_lb.unsqueeze(dim=1) * x_q_lb + \
280
+ g_rt.unsqueeze(dim=1) * x_q_rt
281
+
282
+ # modulation
283
+ if self.modulation:
284
+ m = m.contiguous().permute(0, 2, 3, 1)
285
+ m = m.unsqueeze(dim=1)
286
+ m = torch.cat([m for _ in range(x_offset.size(1))], dim=1)
287
+ x_offset *= m
288
+
289
+ x_offset = self._reshape_x_offset(x_offset, ks)
290
+ out = self.conv(x_offset)
291
+
292
+ return out
293
+
294
+ def _get_p_n(self, N, dtype):
295
+ p_n_x, p_n_y = torch.meshgrid(
296
+ torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1),
297
+ torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1))
298
+ # (2N, 1)
299
+ p_n = torch.cat([torch.flatten(p_n_x), torch.flatten(p_n_y)], 0)
300
+ p_n = p_n.view(1, 2*N, 1, 1).type(dtype)
301
+
302
+ return p_n
303
+
304
+ def _get_p_0(self, h, w, N, dtype):
305
+ p_0_x, p_0_y = torch.meshgrid(
306
+ torch.arange(1, h*self.stride+1, self.stride),
307
+ torch.arange(1, w*self.stride+1, self.stride))
308
+ p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1)
309
+ p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1)
310
+ p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype)
311
+
312
+ return p_0
313
+
314
+ def _get_p(self, offset, dtype):
315
+ N, h, w = offset.size(1)//2, offset.size(2), offset.size(3)
316
+
317
+ # (1, 2N, 1, 1)
318
+ p_n = self._get_p_n(N, dtype)
319
+ # (1, 2N, h, w)
320
+ p_0 = self._get_p_0(h, w, N, dtype)
321
+ p = p_0 + p_n + offset
322
+ return p
323
+
324
+ def _get_x_q(self, x, q, N):
325
+ b, h, w, _ = q.size()
326
+ padded_w = x.size(3)
327
+ c = x.size(1)
328
+ # (b, c, h*w)
329
+ x = x.contiguous().view(b, c, -1)
330
+
331
+ # (b, h, w, N)
332
+ index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y
333
+ # (b, c, h*w*N)
334
+ index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1)
335
+
336
+ x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N)
337
+
338
+ return x_offset
339
+
340
+ @staticmethod
341
+ def _reshape_x_offset(x_offset, ks):
342
+ b, c, h, w, N = x_offset.size()
343
+ x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1)
344
+ x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks)
345
+
346
+ return x_offset
347
+
348
+
349
+ class GAP(nn.Module):
350
+ def __init__(self):
351
+ super(GAP, self).__init__()
352
+ self.avg_pool = nn.AdaptiveAvgPool2d(1)
353
+ def forward(self, x):
354
+ #b, c, _, _ = x.size()
355
+ return self.avg_pool(x)#.view(b, c)
356
+
357
+
358
+ class Silence(nn.Module):
359
+ def __init__(self):
360
+ super(Silence, self).__init__()
361
+ def forward(self, x):
362
+ return x
363
+
364
+
365
+ class ScaleChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
366
+ def __init__(self, layers):
367
+ super(ScaleChannel, self).__init__()
368
+ self.layers = layers # layer indices
369
+
370
+ def forward(self, x, outputs):
371
+ a = outputs[self.layers[0]]
372
+ return x.expand_as(a) * a
373
+
374
+
375
+ class ShiftChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
376
+ def __init__(self, layers):
377
+ super(ShiftChannel, self).__init__()
378
+ self.layers = layers # layer indices
379
+
380
+ def forward(self, x, outputs):
381
+ a = outputs[self.layers[0]]
382
+ return a.expand_as(x) + x
383
+
384
+
385
+ class ShiftChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
386
+ def __init__(self, layers):
387
+ super(ShiftChannel2D, self).__init__()
388
+ self.layers = layers # layer indices
389
+
390
+ def forward(self, x, outputs):
391
+ a = outputs[self.layers[0]].view(1,-1,1,1)
392
+ return a.expand_as(x) + x
393
+
394
+
395
+ class ControlChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
396
+ def __init__(self, layers):
397
+ super(ControlChannel, self).__init__()
398
+ self.layers = layers # layer indices
399
+
400
+ def forward(self, x, outputs):
401
+ a = outputs[self.layers[0]]
402
+ return a.expand_as(x) * x
403
+
404
+
405
+ class ControlChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
406
+ def __init__(self, layers):
407
+ super(ControlChannel2D, self).__init__()
408
+ self.layers = layers # layer indices
409
+
410
+ def forward(self, x, outputs):
411
+ a = outputs[self.layers[0]].view(1,-1,1,1)
412
+ return a.expand_as(x) * x
413
+
414
+
415
+ class AlternateChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
416
+ def __init__(self, layers):
417
+ super(AlternateChannel, self).__init__()
418
+ self.layers = layers # layer indices
419
+
420
+ def forward(self, x, outputs):
421
+ a = outputs[self.layers[0]]
422
+ return torch.cat([a.expand_as(x), x], dim=1)
423
+
424
+
425
+ class AlternateChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
426
+ def __init__(self, layers):
427
+ super(AlternateChannel2D, self).__init__()
428
+ self.layers = layers # layer indices
429
+
430
+ def forward(self, x, outputs):
431
+ a = outputs[self.layers[0]].view(1,-1,1,1)
432
+ return torch.cat([a.expand_as(x), x], dim=1)
433
+
434
+
435
+ class SelectChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
436
+ def __init__(self, layers):
437
+ super(SelectChannel, self).__init__()
438
+ self.layers = layers # layer indices
439
+
440
+ def forward(self, x, outputs):
441
+ a = outputs[self.layers[0]]
442
+ return a.sigmoid().expand_as(x) * x
443
+
444
+
445
+ class SelectChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
446
+ def __init__(self, layers):
447
+ super(SelectChannel2D, self).__init__()
448
+ self.layers = layers # layer indices
449
+
450
+ def forward(self, x, outputs):
451
+ a = outputs[self.layers[0]].view(1,-1,1,1)
452
+ return a.sigmoid().expand_as(x) * x
453
+
454
+
455
+ class ScaleSpatial(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
456
+ def __init__(self, layers):
457
+ super(ScaleSpatial, self).__init__()
458
+ self.layers = layers # layer indices
459
+
460
+ def forward(self, x, outputs):
461
+ a = outputs[self.layers[0]]
462
+ return x * a
463
+
464
+
465
+ class ImplicitA(nn.Module):
466
+ def __init__(self, channel):
467
+ super(ImplicitA, self).__init__()
468
+ self.channel = channel
469
+ self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
470
+ nn.init.normal_(self.implicit, std=.02)
471
+
472
+ def forward(self):
473
+ return self.implicit
474
+
475
+
476
+ class ImplicitC(nn.Module):
477
+ def __init__(self, channel):
478
+ super(ImplicitC, self).__init__()
479
+ self.channel = channel
480
+ self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
481
+ nn.init.normal_(self.implicit, std=.02)
482
+
483
+ def forward(self):
484
+ return self.implicit
485
+
486
+
487
+ class ImplicitM(nn.Module):
488
+ def __init__(self, channel):
489
+ super(ImplicitM, self).__init__()
490
+ self.channel = channel
491
+ self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1))
492
+ nn.init.normal_(self.implicit, mean=1., std=.02)
493
+
494
+ def forward(self):
495
+ return self.implicit
496
+
497
+
498
+
499
+ class Implicit2DA(nn.Module):
500
+ def __init__(self, atom, channel):
501
+ super(Implicit2DA, self).__init__()
502
+ self.channel = channel
503
+ self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1))
504
+ nn.init.normal_(self.implicit, std=.02)
505
+
506
+ def forward(self):
507
+ return self.implicit
508
+
509
+
510
+ class Implicit2DC(nn.Module):
511
+ def __init__(self, atom, channel):
512
+ super(Implicit2DC, self).__init__()
513
+ self.channel = channel
514
+ self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1))
515
+ nn.init.normal_(self.implicit, std=.02)
516
+
517
+ def forward(self):
518
+ return self.implicit
519
+
520
+
521
+ class Implicit2DM(nn.Module):
522
+ def __init__(self, atom, channel):
523
+ super(Implicit2DM, self).__init__()
524
+ self.channel = channel
525
+ self.implicit = nn.Parameter(torch.ones(1, atom, channel, 1))
526
+ nn.init.normal_(self.implicit, mean=1., std=.02)
527
+
528
+ def forward(self):
529
+ return self.implicit
530
+
531
+
532
+
asone/detectors/yolor/utils/loss.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Loss functions
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+
6
+ from asone.detectors.yolor.utils.general import bbox_iou
7
+ from asone.detectors.yolor.utils.torch_utils import is_parallel
8
+
9
+
10
+ def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
11
+ # return positive, negative label smoothing BCE targets
12
+ return 1.0 - 0.5 * eps, 0.5 * eps
13
+
14
+
15
+ class BCEBlurWithLogitsLoss(nn.Module):
16
+ # BCEwithLogitLoss() with reduced missing label effects.
17
+ def __init__(self, alpha=0.05):
18
+ super(BCEBlurWithLogitsLoss, self).__init__()
19
+ self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss()
20
+ self.alpha = alpha
21
+
22
+ def forward(self, pred, true):
23
+ loss = self.loss_fcn(pred, true)
24
+ pred = torch.sigmoid(pred) # prob from logits
25
+ dx = pred - true # reduce only missing label effects
26
+ # dx = (pred - true).abs() # reduce missing label and false label effects
27
+ alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
28
+ loss *= alpha_factor
29
+ return loss.mean()
30
+
31
+
32
+ class FocalLoss(nn.Module):
33
+ # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
34
+ def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
35
+ super(FocalLoss, self).__init__()
36
+ self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
37
+ self.gamma = gamma
38
+ self.alpha = alpha
39
+ self.reduction = loss_fcn.reduction
40
+ self.loss_fcn.reduction = 'none' # required to apply FL to each element
41
+
42
+ def forward(self, pred, true):
43
+ loss = self.loss_fcn(pred, true)
44
+ # p_t = torch.exp(-loss)
45
+ # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
46
+
47
+ # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
48
+ pred_prob = torch.sigmoid(pred) # prob from logits
49
+ p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
50
+ alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
51
+ modulating_factor = (1.0 - p_t) ** self.gamma
52
+ loss *= alpha_factor * modulating_factor
53
+
54
+ if self.reduction == 'mean':
55
+ return loss.mean()
56
+ elif self.reduction == 'sum':
57
+ return loss.sum()
58
+ else: # 'none'
59
+ return loss
60
+
61
+
62
+ def compute_loss(p, targets, model): # predictions, targets, model
63
+ device = targets.device
64
+ #print(device)
65
+ lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
66
+ tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets
67
+ h = model.hyp # hyperparameters
68
+
69
+ # Define criteria
70
+ BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['cls_pw']])).to(device)
71
+ BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['obj_pw']])).to(device)
72
+
73
+ # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
74
+ cp, cn = smooth_BCE(eps=0.0)
75
+
76
+ # Focal loss
77
+ g = h['fl_gamma'] # focal loss gamma
78
+ if g > 0:
79
+ BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
80
+
81
+ # Losses
82
+ nt = 0 # number of targets
83
+ no = len(p) # number of outputs
84
+ balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6
85
+ balance = [4.0, 1.0, 0.5, 0.4, 0.1] if no == 5 else balance
86
+ for i, pi in enumerate(p): # layer index, layer predictions
87
+ b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
88
+ tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
89
+
90
+ n = b.shape[0] # number of targets
91
+ if n:
92
+ nt += n # cumulative targets
93
+ ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
94
+
95
+ # Regression
96
+ pxy = ps[:, :2].sigmoid() * 2. - 0.5
97
+ pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
98
+ pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box
99
+ iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
100
+ lbox += (1.0 - iou).mean() # iou loss
101
+
102
+ # Objectness
103
+ tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio
104
+
105
+ # Classification
106
+ if model.nc > 1: # cls loss (only if multiple classes)
107
+ t = torch.full_like(ps[:, 5:], cn, device=device) # targets
108
+ t[range(n), tcls[i]] = cp
109
+ lcls += BCEcls(ps[:, 5:], t) # BCE
110
+
111
+ # Append targets to text file
112
+ # with open('targets.txt', 'a') as file:
113
+ # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
114
+
115
+ lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss
116
+
117
+ s = 3 / no # output count scaling
118
+ lbox *= h['box'] * s
119
+ lobj *= h['obj'] * s * (1.4 if no >= 4 else 1.)
120
+ lcls *= h['cls'] * s
121
+ bs = tobj.shape[0] # batch size
122
+
123
+ loss = lbox + lobj + lcls
124
+ return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
125
+
126
+
127
+ def build_targets(p, targets, model):
128
+ nt = targets.shape[0] # number of anchors, targets
129
+ tcls, tbox, indices, anch = [], [], [], []
130
+ gain = torch.ones(6, device=targets.device) # normalized to gridspace gain
131
+ off = torch.tensor([[1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device).float() # overlap offsets
132
+
133
+ g = 0.5 # offset
134
+ multi_gpu = is_parallel(model)
135
+ for i, jj in enumerate(model.module.yolo_layers if multi_gpu else model.yolo_layers):
136
+ # get number of grid points and anchor vec for this yolo layer
137
+ anchors = model.module.module_list[jj].anchor_vec if multi_gpu else model.module_list[jj].anchor_vec
138
+ gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
139
+
140
+ # Match targets to anchors
141
+ a, t, offsets = [], targets * gain, 0
142
+ if nt:
143
+ na = anchors.shape[0] # number of anchors
144
+ at = torch.arange(na).view(na, 1).repeat(1, nt) # anchor tensor, same as .repeat_interleave(nt)
145
+ r = t[None, :, 4:6] / anchors[:, None] # wh ratio
146
+ j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare
147
+ # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2))
148
+ a, t = at[j], t.repeat(na, 1, 1)[j] # filter
149
+
150
+ # overlaps
151
+ gxy = t[:, 2:4] # grid xy
152
+ z = torch.zeros_like(gxy)
153
+ j, k = ((gxy % 1. < g) & (gxy > 1.)).T
154
+ l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T
155
+ a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0)
156
+ offsets = torch.cat((z, z[j] + off[0], z[k] + off[1], z[l] + off[2], z[m] + off[3]), 0) * g
157
+
158
+ # Define
159
+ b, c = t[:, :2].long().T # image, class
160
+ gxy = t[:, 2:4] # grid xy
161
+ gwh = t[:, 4:6] # grid wh
162
+ gij = (gxy - offsets).long()
163
+ gi, gj = gij.T # grid xy indices
164
+
165
+ # Append
166
+ #indices.append((b, a, gj, gi)) # image, anchor, grid indices
167
+ indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
168
+ tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
169
+ anch.append(anchors[a]) # anchors
170
+ tcls.append(c) # class
171
+
172
+ return tcls, tbox, indices, anch
173
+
asone/detectors/yolor/utils/metrics.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model validation metrics
2
+
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+
6
+
7
+ def fitness(x):
8
+ # Model fitness as a weighted combination of metrics
9
+ w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
10
+ return (x[:, :4] * w).sum(1)
11
+
12
+
13
+ def fitness_p(x):
14
+ # Model fitness as a weighted combination of metrics
15
+ w = [1.0, 0.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
16
+ return (x[:, :4] * w).sum(1)
17
+
18
+
19
+ def fitness_r(x):
20
+ # Model fitness as a weighted combination of metrics
21
+ w = [0.0, 1.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
22
+ return (x[:, :4] * w).sum(1)
23
+
24
+
25
+ def fitness_ap50(x):
26
+ # Model fitness as a weighted combination of metrics
27
+ w = [0.0, 0.0, 1.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
28
+ return (x[:, :4] * w).sum(1)
29
+
30
+
31
+ def fitness_ap(x):
32
+ # Model fitness as a weighted combination of metrics
33
+ w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
34
+ return (x[:, :4] * w).sum(1)
35
+
36
+
37
+ def fitness_f(x):
38
+ # Model fitness as a weighted combination of metrics
39
+ #w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
40
+ return ((x[:, 0]*x[:, 1])/(x[:, 0]+x[:, 1]))
41
+
42
+
43
+ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, fname='precision-recall_curve.png'):
44
+ """ Compute the average precision, given the recall and precision curves.
45
+ Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
46
+ # Arguments
47
+ tp: True positives (nparray, nx1 or nx10).
48
+ conf: Objectness value from 0-1 (nparray).
49
+ pred_cls: Predicted object classes (nparray).
50
+ target_cls: True object classes (nparray).
51
+ plot: Plot precision-recall curve at mAP@0.5
52
+ fname: Plot filename
53
+ # Returns
54
+ The average precision as computed in py-faster-rcnn.
55
+ """
56
+
57
+ # Sort by objectness
58
+ i = np.argsort(-conf)
59
+ tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
60
+
61
+ # Find unique classes
62
+ unique_classes = np.unique(target_cls)
63
+
64
+ # Create Precision-Recall curve and compute AP for each class
65
+ px, py = np.linspace(0, 1, 1000), [] # for plotting
66
+ pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
67
+ s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
68
+ ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s)
69
+ for ci, c in enumerate(unique_classes):
70
+ i = pred_cls == c
71
+ n_l = (target_cls == c).sum() # number of labels
72
+ n_p = i.sum() # number of predictions
73
+
74
+ if n_p == 0 or n_l == 0:
75
+ continue
76
+ else:
77
+ # Accumulate FPs and TPs
78
+ fpc = (1 - tp[i]).cumsum(0)
79
+ tpc = tp[i].cumsum(0)
80
+
81
+ # Recall
82
+ recall = tpc / (n_l + 1e-16) # recall curve
83
+ r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases
84
+
85
+ # Precision
86
+ precision = tpc / (tpc + fpc) # precision curve
87
+ p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score
88
+
89
+ # AP from recall-precision curve
90
+ for j in range(tp.shape[1]):
91
+ ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
92
+ if j == 0:
93
+ py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
94
+
95
+ # Compute F1 score (harmonic mean of precision and recall)
96
+ f1 = 2 * p * r / (p + r + 1e-16)
97
+
98
+ if plot:
99
+ py = np.stack(py, axis=1)
100
+ fig, ax = plt.subplots(1, 1, figsize=(5, 5))
101
+ ax.plot(px, py, linewidth=0.5, color='grey') # plot(recall, precision)
102
+ ax.plot(px, py.mean(1), linewidth=2, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
103
+ ax.set_xlabel('Recall')
104
+ ax.set_ylabel('Precision')
105
+ ax.set_xlim(0, 1)
106
+ ax.set_ylim(0, 1)
107
+ plt.legend()
108
+ fig.tight_layout()
109
+ fig.savefig(fname, dpi=200)
110
+
111
+ return p, r, ap, f1, unique_classes.astype('int32')
112
+
113
+
114
+ def compute_ap(recall, precision):
115
+ """ Compute the average precision, given the recall and precision curves.
116
+ Source: https://github.com/rbgirshick/py-faster-rcnn.
117
+ # Arguments
118
+ recall: The recall curve (list).
119
+ precision: The precision curve (list).
120
+ # Returns
121
+ The average precision as computed in py-faster-rcnn.
122
+ """
123
+
124
+ # Append sentinel values to beginning and end
125
+ mrec = np.concatenate(([0.0], recall, [1.0]))
126
+ mpre = np.concatenate(([1.0], precision, [0.0]))
127
+
128
+ # Compute the precision envelope
129
+ mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
130
+
131
+ # Integrate area under curve
132
+ method = 'interp' # methods: 'continuous', 'interp'
133
+ if method == 'interp':
134
+ x = np.linspace(0, 1, 101) # 101-point interp (COCO)
135
+ ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
136
+ else: # 'continuous'
137
+ i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
138
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
139
+
140
+ return ap, mpre, mrec
asone/detectors/yolor/utils/parse_config.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import numpy as np
4
+
5
+
6
+ def parse_model_cfg(path):
7
+ # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3'
8
+ if not path.endswith('.cfg'): # add .cfg suffix if omitted
9
+ path += '.cfg'
10
+ if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): # add cfg/ prefix if omitted
11
+ path = 'cfg' + os.sep + path
12
+
13
+ with open(path, 'r') as f:
14
+ lines = f.read().split('\n')
15
+ lines = [x for x in lines if x and not x.startswith('#')]
16
+ lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
17
+ mdefs = [] # module definitions
18
+ for line in lines:
19
+ if line.startswith('['): # This marks the start of a new block
20
+ mdefs.append({})
21
+ mdefs[-1]['type'] = line[1:-1].rstrip()
22
+ if mdefs[-1]['type'] == 'convolutional':
23
+ mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later)
24
+
25
+ else:
26
+ key, val = line.split("=")
27
+ key = key.rstrip()
28
+
29
+ if key == 'anchors': # return nparray
30
+ mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors
31
+ elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val): # return array
32
+ mdefs[-1][key] = [int(x) for x in val.split(',')]
33
+ else:
34
+ val = val.strip()
35
+ if val.isnumeric(): # return int or float
36
+ mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val)
37
+ else:
38
+ mdefs[-1][key] = val # return string
39
+
40
+ # Check all fields are supported
41
+ supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups',
42
+ 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random',
43
+ 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind',
44
+ 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'atoms', 'na', 'nc']
45
+
46
+ f = [] # fields
47
+ for x in mdefs[1:]:
48
+ [f.append(k) for k in x if k not in f]
49
+ u = [x for x in f if x not in supported] # unsupported fields
50
+ assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path)
51
+
52
+ return mdefs
53
+
54
+
55
+ def parse_data_cfg(path):
56
+ # Parses the data configuration file
57
+ if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted
58
+ path = 'data' + os.sep + path
59
+
60
+ with open(path, 'r') as f:
61
+ lines = f.readlines()
62
+
63
+ options = dict()
64
+ for line in lines:
65
+ line = line.strip()
66
+ if line == '' or line.startswith('#'):
67
+ continue
68
+ key, val = line.split('=')
69
+ options[key.strip()] = val.strip()
70
+
71
+ return options
asone/detectors/yolor/utils/plots.py ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Plotting utils
2
+
3
+ import glob
4
+ import math
5
+ import os
6
+ import random
7
+ from copy import copy
8
+ from pathlib import Path
9
+
10
+ import cv2
11
+ import matplotlib
12
+ import matplotlib.pyplot as plt
13
+ import numpy as np
14
+ import torch
15
+ import yaml
16
+ from PIL import Image
17
+ from scipy.signal import butter, filtfilt
18
+
19
+ from asone.detectors.yolor.utils.general import xywh2xyxy, xyxy2xywh
20
+ from asone.detectors.yolor.utils.metrics import fitness
21
+
22
+ # Settings
23
+ matplotlib.use('Agg') # for writing to files only
24
+
25
+
26
+ def color_list():
27
+ # Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb
28
+ def hex2rgb(h):
29
+ return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
30
+
31
+ return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']]
32
+
33
+
34
+ def hist2d(x, y, n=100):
35
+ # 2d histogram used in labels.png and evolve.png
36
+ xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
37
+ hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
38
+ xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
39
+ yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
40
+ return np.log(hist[xidx, yidx])
41
+
42
+
43
+ def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
44
+ # https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
45
+ def butter_lowpass(cutoff, fs, order):
46
+ nyq = 0.5 * fs
47
+ normal_cutoff = cutoff / nyq
48
+ return butter(order, normal_cutoff, btype='low', analog=False)
49
+
50
+ b, a = butter_lowpass(cutoff, fs, order=order)
51
+ return filtfilt(b, a, data) # forward-backward filter
52
+
53
+
54
+ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
55
+ # Plots one bounding box on image img
56
+ tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
57
+ color = color or [random.randint(0, 255) for _ in range(3)]
58
+ c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
59
+ cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
60
+ if label:
61
+ tf = max(tl - 1, 1) # font thickness
62
+ t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
63
+ c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
64
+ cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
65
+ cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
66
+
67
+
68
+ def plot_wh_methods(): # from utils.general import *; plot_wh_methods()
69
+ # Compares the two methods for width-height anchor multiplication
70
+ # https://github.com/ultralytics/yolov3/issues/168
71
+ x = np.arange(-4.0, 4.0, .1)
72
+ ya = np.exp(x)
73
+ yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2
74
+
75
+ fig = plt.figure(figsize=(6, 3), dpi=150)
76
+ plt.plot(x, ya, '.-', label='YOLO')
77
+ plt.plot(x, yb ** 2, '.-', label='YOLO ^2')
78
+ plt.plot(x, yb ** 1.6, '.-', label='YOLO ^1.6')
79
+ plt.xlim(left=-4, right=4)
80
+ plt.ylim(bottom=0, top=6)
81
+ plt.xlabel('input')
82
+ plt.ylabel('output')
83
+ plt.grid()
84
+ plt.legend()
85
+ fig.tight_layout()
86
+ fig.savefig('comparison.png', dpi=200)
87
+
88
+
89
+ def output_to_target(output, width, height):
90
+ # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
91
+ if isinstance(output, torch.Tensor):
92
+ output = output.cpu().numpy()
93
+
94
+ targets = []
95
+ for i, o in enumerate(output):
96
+ if o is not None:
97
+ for pred in o:
98
+ box = pred[:4]
99
+ w = (box[2] - box[0]) / width
100
+ h = (box[3] - box[1]) / height
101
+ x = box[0] / width + w / 2
102
+ y = box[1] / height + h / 2
103
+ conf = pred[4]
104
+ cls = int(pred[5])
105
+
106
+ targets.append([i, cls, x, y, w, h, conf])
107
+
108
+ return np.array(targets)
109
+
110
+
111
+ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
112
+ # Plot image grid with labels
113
+
114
+ if isinstance(images, torch.Tensor):
115
+ images = images.cpu().float().numpy()
116
+ if isinstance(targets, torch.Tensor):
117
+ targets = targets.cpu().numpy()
118
+
119
+ # un-normalise
120
+ if np.max(images[0]) <= 1:
121
+ images *= 255
122
+
123
+ tl = 3 # line thickness
124
+ tf = max(tl - 1, 1) # font thickness
125
+ bs, _, h, w = images.shape # batch size, _, height, width
126
+ bs = min(bs, max_subplots) # limit plot images
127
+ ns = np.ceil(bs ** 0.5) # number of subplots (square)
128
+
129
+ # Check if we should resize
130
+ scale_factor = max_size / max(h, w)
131
+ if scale_factor < 1:
132
+ h = math.ceil(scale_factor * h)
133
+ w = math.ceil(scale_factor * w)
134
+
135
+ colors = color_list() # list of colors
136
+ mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
137
+ for i, img in enumerate(images):
138
+ if i == max_subplots: # if last batch has fewer images than we expect
139
+ break
140
+
141
+ block_x = int(w * (i // ns))
142
+ block_y = int(h * (i % ns))
143
+
144
+ img = img.transpose(1, 2, 0)
145
+ if scale_factor < 1:
146
+ img = cv2.resize(img, (w, h))
147
+
148
+ mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
149
+ if len(targets) > 0:
150
+ image_targets = targets[targets[:, 0] == i]
151
+ boxes = xywh2xyxy(image_targets[:, 2:6]).T
152
+ classes = image_targets[:, 1].astype('int')
153
+ labels = image_targets.shape[1] == 6 # labels if no conf column
154
+ conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred)
155
+
156
+ boxes[[0, 2]] *= w
157
+ boxes[[0, 2]] += block_x
158
+ boxes[[1, 3]] *= h
159
+ boxes[[1, 3]] += block_y
160
+ for j, box in enumerate(boxes.T):
161
+ cls = int(classes[j])
162
+ color = colors[cls % len(colors)]
163
+ cls = names[cls] if names else cls
164
+ if labels or conf[j] > 0.25: # 0.25 conf thresh
165
+ label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
166
+ plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
167
+
168
+ # Draw image filename labels
169
+ if paths:
170
+ label = Path(paths[i]).name[:40] # trim to 40 char
171
+ t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
172
+ cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
173
+ lineType=cv2.LINE_AA)
174
+
175
+ # Image border
176
+ cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
177
+
178
+ if fname:
179
+ r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size
180
+ mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
181
+ # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save
182
+ Image.fromarray(mosaic).save(fname) # PIL save
183
+ return mosaic
184
+
185
+
186
+ def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
187
+ # Plot LR simulating training for full epochs
188
+ optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals
189
+ y = []
190
+ for _ in range(epochs):
191
+ scheduler.step()
192
+ y.append(optimizer.param_groups[0]['lr'])
193
+ plt.plot(y, '.-', label='LR')
194
+ plt.xlabel('epoch')
195
+ plt.ylabel('LR')
196
+ plt.grid()
197
+ plt.xlim(0, epochs)
198
+ plt.ylim(0)
199
+ plt.tight_layout()
200
+ plt.savefig(Path(save_dir) / 'LR.png', dpi=200)
201
+
202
+
203
+ def plot_test_txt(): # from utils.general import *; plot_test()
204
+ # Plot test.txt histograms
205
+ x = np.loadtxt('test.txt', dtype=np.float32)
206
+ box = xyxy2xywh(x[:, :4])
207
+ cx, cy = box[:, 0], box[:, 1]
208
+
209
+ fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
210
+ ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
211
+ ax.set_aspect('equal')
212
+ plt.savefig('hist2d.png', dpi=300)
213
+
214
+ fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
215
+ ax[0].hist(cx, bins=600)
216
+ ax[1].hist(cy, bins=600)
217
+ plt.savefig('hist1d.png', dpi=200)
218
+
219
+
220
+ def plot_targets_txt(): # from utils.general import *; plot_targets_txt()
221
+ # Plot targets.txt histograms
222
+ x = np.loadtxt('targets.txt', dtype=np.float32).T
223
+ s = ['x targets', 'y targets', 'width targets', 'height targets']
224
+ fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
225
+ ax = ax.ravel()
226
+ for i in range(4):
227
+ ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std()))
228
+ ax[i].legend()
229
+ ax[i].set_title(s[i])
230
+ plt.savefig('targets.jpg', dpi=200)
231
+
232
+
233
+ def plot_study_txt(f='study.txt', x=None): # from utils.general import *; plot_study_txt()
234
+ # Plot study.txt generated by test.py
235
+ fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)
236
+ ax = ax.ravel()
237
+
238
+ fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
239
+ for f in ['study/study_coco_yolo%s.txt' % x for x in ['s', 'm', 'l', 'x']]:
240
+ y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
241
+ x = np.arange(y.shape[1]) if x is None else np.array(x)
242
+ s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)']
243
+ for i in range(7):
244
+ ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)
245
+ ax[i].set_title(s[i])
246
+
247
+ j = y[3].argmax() + 1
248
+ ax2.plot(y[6, :j], y[3, :j] * 1E2, '.-', linewidth=2, markersize=8,
249
+ label=Path(f).stem.replace('study_coco_', '').replace('yolo', 'YOLO'))
250
+
251
+ ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
252
+ 'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet')
253
+
254
+ ax2.grid()
255
+ ax2.set_xlim(0, 30)
256
+ ax2.set_ylim(28, 50)
257
+ ax2.set_yticks(np.arange(30, 55, 5))
258
+ ax2.set_xlabel('GPU Speed (ms/img)')
259
+ ax2.set_ylabel('COCO AP val')
260
+ ax2.legend(loc='lower right')
261
+ plt.savefig('study_mAP_latency.png', dpi=300)
262
+ plt.savefig(f.replace('.txt', '.png'), dpi=300)
263
+
264
+
265
+ def plot_labels(labels, save_dir=''):
266
+ # plot dataset labels
267
+ c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes
268
+ nc = int(c.max() + 1) # number of classes
269
+
270
+ fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
271
+ ax = ax.ravel()
272
+ ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
273
+ ax[0].set_xlabel('classes')
274
+ ax[1].scatter(b[0], b[1], c=hist2d(b[0], b[1], 90), cmap='jet')
275
+ ax[1].set_xlabel('x')
276
+ ax[1].set_ylabel('y')
277
+ ax[2].scatter(b[2], b[3], c=hist2d(b[2], b[3], 90), cmap='jet')
278
+ ax[2].set_xlabel('width')
279
+ ax[2].set_ylabel('height')
280
+ plt.savefig(Path(save_dir) / 'labels.png', dpi=200)
281
+ plt.close()
282
+
283
+ # seaborn correlogram
284
+ try:
285
+ import seaborn as sns
286
+ import pandas as pd
287
+ x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
288
+ sns.pairplot(x, corner=True, diag_kind='hist', kind='scatter', markers='o',
289
+ plot_kws=dict(s=3, edgecolor=None, linewidth=1, alpha=0.02),
290
+ diag_kws=dict(bins=50))
291
+ plt.savefig(Path(save_dir) / 'labels_correlogram.png', dpi=200)
292
+ plt.close()
293
+ except Exception as e:
294
+ pass
295
+
296
+
297
+ def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.general import *; plot_evolution()
298
+ # Plot hyperparameter evolution results in evolve.txt
299
+ with open(yaml_file) as f:
300
+ hyp = yaml.load(f, Loader=yaml.FullLoader)
301
+ x = np.loadtxt('evolve.txt', ndmin=2)
302
+ f = fitness(x)
303
+ # weights = (f - f.min()) ** 2 # for weighted results
304
+ plt.figure(figsize=(10, 12), tight_layout=True)
305
+ matplotlib.rc('font', **{'size': 8})
306
+ for i, (k, v) in enumerate(hyp.items()):
307
+ y = x[:, i + 7]
308
+ # mu = (y * weights).sum() / weights.sum() # best weighted result
309
+ mu = y[f.argmax()] # best single result
310
+ plt.subplot(6, 5, i + 1)
311
+ plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
312
+ plt.plot(mu, f.max(), 'k+', markersize=15)
313
+ plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters
314
+ if i % 5 != 0:
315
+ plt.yticks([])
316
+ print('%15s: %.3g' % (k, mu))
317
+ plt.savefig('evolve.png', dpi=200)
318
+ print('\nPlot saved as evolve.png')
319
+
320
+
321
+ def plot_results_overlay(start=0, stop=0): # from utils.general import *; plot_results_overlay()
322
+ # Plot training 'results*.txt', overlaying train and val losses
323
+ s = ['train', 'train', 'train', 'Precision', 'mAP@0.5', 'val', 'val', 'val', 'Recall', 'mAP@0.5:0.95'] # legends
324
+ t = ['Box', 'Objectness', 'Classification', 'P-R', 'mAP-F1'] # titles
325
+ for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')):
326
+ results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
327
+ n = results.shape[1] # number of rows
328
+ x = range(start, min(stop, n) if stop else n)
329
+ fig, ax = plt.subplots(1, 5, figsize=(14, 3.5), tight_layout=True)
330
+ ax = ax.ravel()
331
+ for i in range(5):
332
+ for j in [i, i + 5]:
333
+ y = results[j, x]
334
+ ax[i].plot(x, y, marker='.', label=s[j])
335
+ # y_smooth = butter_lowpass_filtfilt(y)
336
+ # ax[i].plot(x, np.gradient(y_smooth), marker='.', label=s[j])
337
+
338
+ ax[i].set_title(t[i])
339
+ ax[i].legend()
340
+ ax[i].set_ylabel(f) if i == 0 else None # add filename
341
+ fig.savefig(f.replace('.txt', '.png'), dpi=200)
342
+
343
+
344
+ def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''):
345
+ # from utils.general import *; plot_results(save_dir='runs/train/exp0')
346
+ # Plot training 'results*.txt'
347
+ fig, ax = plt.subplots(2, 5, figsize=(12, 6))
348
+ ax = ax.ravel()
349
+ s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall',
350
+ 'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95']
351
+ if bucket:
352
+ # os.system('rm -rf storage.googleapis.com')
353
+ # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id]
354
+ files = ['%g.txt' % x for x in id]
355
+ c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/%g.txt' % (bucket, x) for x in id)
356
+ os.system(c)
357
+ else:
358
+ files = glob.glob(str(Path(save_dir) / '*.txt')) + glob.glob('../../Downloads/results*.txt')
359
+ assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir)
360
+ for fi, f in enumerate(files):
361
+ try:
362
+ results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
363
+ n = results.shape[1] # number of rows
364
+ x = range(start, min(stop, n) if stop else n)
365
+ for i in range(10):
366
+ y = results[i, x]
367
+ if i in [0, 1, 2, 5, 6, 7]:
368
+ y[y == 0] = np.nan # don't show zero loss values
369
+ # y /= y[0] # normalize
370
+ label = labels[fi] if len(labels) else Path(f).stem
371
+ ax[i].plot(x, y, marker='.', label=label, linewidth=1, markersize=6)
372
+ ax[i].set_title(s[i])
373
+ # if i in [5, 6, 7]: # share train and val loss y axes
374
+ # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
375
+ except Exception as e:
376
+ print('Warning: Plotting error for %s; %s' % (f, e))
377
+
378
+ fig.tight_layout()
379
+ ax[1].legend()
380
+ fig.savefig(Path(save_dir) / 'results.png', dpi=200)
asone/detectors/yolor/utils/torch_utils.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PyTorch utils
2
+
3
+ import logging
4
+ import math
5
+ import os
6
+ import time
7
+ from contextlib import contextmanager
8
+ from copy import deepcopy
9
+
10
+ import torch
11
+ import torch.backends.cudnn as cudnn
12
+ import torch.nn as nn
13
+ import torch.nn.functional as F
14
+ import torchvision
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @contextmanager
20
+ def torch_distributed_zero_first(local_rank: int):
21
+ """
22
+ Decorator to make all processes in distributed training wait for each local_master to do something.
23
+ """
24
+ if local_rank not in [-1, 0]:
25
+ torch.distributed.barrier()
26
+ yield
27
+ if local_rank == 0:
28
+ torch.distributed.barrier()
29
+
30
+
31
+ def init_torch_seeds(seed=0):
32
+ # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
33
+ torch.manual_seed(seed)
34
+ if seed == 0: # slower, more reproducible
35
+ cudnn.deterministic = True
36
+ cudnn.benchmark = False
37
+ else: # faster, less reproducible
38
+ cudnn.deterministic = False
39
+ cudnn.benchmark = True
40
+
41
+
42
+ def select_device(device='', batch_size=None):
43
+ # device = 'cpu' or '0' or '0,1,2,3'
44
+ cpu_request = device.lower() == 'cpu'
45
+ if device and not cpu_request: # if device requested other than 'cpu'
46
+ os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
47
+ assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity
48
+
49
+ cuda = False if cpu_request else torch.cuda.is_available()
50
+ if cuda:
51
+ c = 1024 ** 2 # bytes to MB
52
+ ng = torch.cuda.device_count()
53
+ if ng > 1 and batch_size: # check that batch_size is compatible with device_count
54
+ assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
55
+ x = [torch.cuda.get_device_properties(i) for i in range(ng)]
56
+ s = f'Using torch {torch.__version__} '
57
+ for i in range(0, ng):
58
+ if i == 1:
59
+ s = ' ' * len(s)
60
+ logger.info("%sCUDA:%g (%s, %dMB)" % (s, i, x[i].name, x[i].total_memory / c))
61
+ else:
62
+ logger.info(f'Using torch {torch.__version__} CPU')
63
+
64
+ logger.info('') # skip a line
65
+ return torch.device('cuda:0' if cuda else 'cpu')
66
+
67
+
68
+ def time_synchronized():
69
+ torch.cuda.synchronize() if torch.cuda.is_available() else None
70
+ return time.time()
71
+
72
+
73
+ def is_parallel(model):
74
+ return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
75
+
76
+
77
+ def intersect_dicts(da, db, exclude=()):
78
+ # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
79
+ return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
80
+
81
+
82
+ def initialize_weights(model):
83
+ for m in model.modules():
84
+ t = type(m)
85
+ if t is nn.Conv2d:
86
+ pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
87
+ elif t is nn.BatchNorm2d:
88
+ m.eps = 1e-3
89
+ m.momentum = 0.03
90
+ elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
91
+ m.inplace = True
92
+
93
+
94
+ def find_modules(model, mclass=nn.Conv2d):
95
+ # Finds layer indices matching module class 'mclass'
96
+ return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
97
+
98
+
99
+ def sparsity(model):
100
+ # Return global model sparsity
101
+ a, b = 0., 0.
102
+ for p in model.parameters():
103
+ a += p.numel()
104
+ b += (p == 0).sum()
105
+ return b / a
106
+
107
+
108
+ def prune(model, amount=0.3):
109
+ # Prune model to requested global sparsity
110
+ import torch.nn.utils.prune as prune
111
+ print('Pruning model... ', end='')
112
+ for name, m in model.named_modules():
113
+ if isinstance(m, nn.Conv2d):
114
+ prune.l1_unstructured(m, name='weight', amount=amount) # prune
115
+ prune.remove(m, 'weight') # make permanent
116
+ print(' %.3g global sparsity' % sparsity(model))
117
+
118
+
119
+ def fuse_conv_and_bn(conv, bn):
120
+ # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
121
+ fusedconv = nn.Conv2d(conv.in_channels,
122
+ conv.out_channels,
123
+ kernel_size=conv.kernel_size,
124
+ stride=conv.stride,
125
+ padding=conv.padding,
126
+ groups=conv.groups,
127
+ bias=True).requires_grad_(False).to(conv.weight.device)
128
+
129
+ # prepare filters
130
+ w_conv = conv.weight.clone().view(conv.out_channels, -1)
131
+ w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
132
+ fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
133
+
134
+ # prepare spatial bias
135
+ b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
136
+ b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
137
+ fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
138
+
139
+ return fusedconv
140
+
141
+
142
+ def model_info(model, verbose=False, img_size=640):
143
+ # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
144
+ n_p = sum(x.numel() for x in model.parameters()) # number parameters
145
+ n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
146
+ if verbose:
147
+ print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
148
+ for i, (name, p) in enumerate(model.named_parameters()):
149
+ name = name.replace('module_list.', '')
150
+ print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
151
+ (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
152
+
153
+ try: # FLOPS
154
+ from thop import profile
155
+ flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, img_size, img_size),), verbose=False)[0] / 1E9 * 2
156
+ img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float
157
+ fs = ', %.9f GFLOPS' % (flops) # 640x640 FLOPS
158
+ except (ImportError, Exception):
159
+ fs = ''
160
+
161
+ logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
162
+
163
+
164
+ def load_classifier(name='resnet101', n=2):
165
+ # Loads a pretrained model reshaped to n-class output
166
+ model = torchvision.models.__dict__[name](pretrained=True)
167
+
168
+ # ResNet model properties
169
+ # input_size = [3, 224, 224]
170
+ # input_space = 'RGB'
171
+ # input_range = [0, 1]
172
+ # mean = [0.485, 0.456, 0.406]
173
+ # std = [0.229, 0.224, 0.225]
174
+
175
+ # Reshape output to n classes
176
+ filters = model.fc.weight.shape[1]
177
+ model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
178
+ model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
179
+ model.fc.out_features = n
180
+ return model
181
+
182
+
183
+ def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio
184
+ # scales img(bs,3,y,x) by ratio
185
+ if ratio == 1.0:
186
+ return img
187
+ else:
188
+ h, w = img.shape[2:]
189
+ s = (int(h * ratio), int(w * ratio)) # new size
190
+ img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
191
+ if not same_shape: # pad/crop img
192
+ gs = 32 # (pixels) grid size
193
+ h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
194
+ return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
195
+
196
+
197
+ def copy_attr(a, b, include=(), exclude=()):
198
+ # Copy attributes from b to a, options to only include [...] and to exclude [...]
199
+ for k, v in b.__dict__.items():
200
+ if (len(include) and k not in include) or k.startswith('_') or k in exclude:
201
+ continue
202
+ else:
203
+ setattr(a, k, v)
204
+
205
+
206
+ class ModelEMA:
207
+ """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
208
+ Keep a moving average of everything in the model state_dict (parameters and buffers).
209
+ This is intended to allow functionality like
210
+ https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
211
+ A smoothed version of the weights is necessary for some training schemes to perform well.
212
+ This class is sensitive where it is initialized in the sequence of model init,
213
+ GPU assignment and distributed training wrappers.
214
+ """
215
+
216
+ def __init__(self, model, decay=0.9999, updates=0):
217
+ # Create EMA
218
+ self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA
219
+ # if next(model.parameters()).device.type != 'cpu':
220
+ # self.ema.half() # FP16 EMA
221
+ self.updates = updates # number of EMA updates
222
+ self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
223
+ for p in self.ema.parameters():
224
+ p.requires_grad_(False)
225
+
226
+ def update(self, model):
227
+ # Update EMA parameters
228
+ with torch.no_grad():
229
+ self.updates += 1
230
+ d = self.decay(self.updates)
231
+
232
+ msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
233
+ for k, v in self.ema.state_dict().items():
234
+ if v.dtype.is_floating_point:
235
+ v *= d
236
+ v += (1. - d) * msd[k].detach()
237
+
238
+ def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
239
+ # Update EMA attributes
240
+ copy_attr(self.ema, model, include, exclude)
asone/detectors/yolor/utils/yolor_utils.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision
3
+ import time
4
+ import numpy as np
5
+ import cv2
6
+
7
+
8
+ class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
9
+
10
+ # Create a list of colors for each class where each color is a tuple of 3 integer values
11
+ rng = np.random.default_rng(3)
12
+ colors = rng.uniform(0, 255, size=(len(class_names), 3))
13
+
14
+ def box_area(box):
15
+ # box = xyxy(4,n)
16
+ return (box[2] - box[0]) * (box[3] - box[1])
17
+
18
+
19
+ def box_iou(box1, box2, eps=1e-7):
20
+ # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
21
+ """
22
+ Return intersection-over-union (Jaccard index) of boxes.
23
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
24
+ Arguments:
25
+ box1 (Tensor[N, 4])
26
+ box2 (Tensor[M, 4])
27
+ Returns:
28
+ iou (Tensor[N, M]): the NxM matrix containing the pairwise
29
+ IoU values for every element in boxes1 and boxes2
30
+ """
31
+
32
+ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
33
+ (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
34
+ inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
35
+
36
+ # IoU = inter / (area1 + area2 - inter)
37
+ return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps)
38
+
39
+ def xywh2xyxy(x):
40
+ # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
41
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
42
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
43
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
44
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
45
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
46
+ return y
47
+
48
+ def non_max_suppression(prediction,
49
+ conf_thres=0.25,
50
+ iou_thres=0.45,
51
+ classes=None,
52
+ agnostic=False,
53
+ multi_label=False,
54
+ labels=(),
55
+ max_det=300):
56
+ """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
57
+ Returns:
58
+ list of detections, on (n,6) tensor per image [xyxy, conf, cls]
59
+ """
60
+ # prediction = torch.Tensor(prediction)
61
+ bs = prediction.shape[0] # batch size
62
+ nc = prediction.shape[2] - 5 # number of classes
63
+ xc = prediction[..., 4] > conf_thres # candidates
64
+ # Checks
65
+ assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
66
+ assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
67
+
68
+ # Settings
69
+ # min_wh = 2 # (pixels) minimum box width and height
70
+ max_wh = 7680 # (pixels) maximum box width and height
71
+ max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
72
+ time_limit = 0.3 + 0.03 * bs # seconds to quit after
73
+ redundant = True # require redundant detections
74
+ multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
75
+ merge = False # use merge-NMS
76
+
77
+ t = time.time()
78
+ output = [torch.zeros((0, 6), device=prediction.device)] * bs
79
+ for xi, x in enumerate(prediction): # image index, image inference
80
+ # Apply constraints
81
+ # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
82
+ x = x[xc[xi]] # confidence
83
+
84
+ # Cat apriori labels if autolabelling
85
+ if labels and len(labels[xi]):
86
+ lb = labels[xi]
87
+ v = torch.zeros((len(lb), nc + 5), device=x.device)
88
+ v[:, :4] = lb[:, 1:5] # box
89
+ v[:, 4] = 1.0 # conf
90
+ v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
91
+ x = torch.cat((x, v), 0)
92
+
93
+ # If none remain process next image
94
+ if not x.shape[0]:
95
+ continue
96
+
97
+ # Compute conf
98
+ x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
99
+
100
+ # Box (center x, center y, width, height) to (x1, y1, x2, y2)
101
+ # print(type(x))
102
+ box = xywh2xyxy(x[:, :4])
103
+
104
+ # Detections matrix nx6 (xyxy, conf, cls)
105
+ if multi_label:
106
+ i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
107
+ x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
108
+ else: # best class only
109
+ conf, j = x[:, 5:].max(1, keepdim=True)
110
+ x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
111
+
112
+ # Filter by class
113
+ if classes is not None:
114
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
115
+
116
+ # Apply finite constraint
117
+ # if not torch.isfinite(x).all():
118
+ # x = x[torch.isfinite(x).all(1)]
119
+
120
+ # Check shape
121
+ n = x.shape[0] # number of boxes
122
+ if not n: # no boxes
123
+ continue
124
+ elif n > max_nms: # excess boxes
125
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
126
+
127
+ # Batched NMS
128
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
129
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
130
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
131
+ if i.shape[0] > max_det: # limit detections
132
+ i = i[:max_det]
133
+ if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
134
+ # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
135
+ iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
136
+ weights = iou * scores[None] # box weights
137
+ x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
138
+ if redundant:
139
+ i = i[iou.sum(1) > 1] # require redundancy
140
+
141
+ output[xi] = x[i]
142
+ if (time.time() - t) > time_limit:
143
+ # LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
144
+ break # time limit exceeded
145
+
146
+ return output
147
+
148
+ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
149
+ # Resize and pad image while meeting stride-multiple constraints
150
+ shape = im.shape[:2] # current shape [height, width]
151
+ if isinstance(new_shape, int):
152
+ new_shape = (new_shape, new_shape)
153
+
154
+ # Scale ratio (new / old)
155
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
156
+ if not scaleup: # only scale down, do not scale up (for better val mAP)
157
+ r = min(r, 1.0)
158
+
159
+ # Compute padding
160
+ ratio = r, r # width, height ratios
161
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
162
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
163
+ if auto: # minimum rectangle
164
+ dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
165
+ elif scaleFill: # stretch
166
+ dw, dh = 0.0, 0.0
167
+ new_unpad = (new_shape[1], new_shape[0])
168
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
169
+
170
+ dw /= 2 # divide padding into 2 sides
171
+ dh /= 2
172
+
173
+ if shape[::-1] != new_unpad: # resize
174
+ im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
175
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
176
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
177
+ im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
178
+ return im, ratio, (dw, dh)
179
+
180
+ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
181
+ # Rescale coords (xyxy) from img1_shape to img0_shape
182
+ if ratio_pad is None: # calculate from img0_shape
183
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
184
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
185
+ else:
186
+ gain = ratio_pad[0][0]
187
+ pad = ratio_pad[1]
188
+
189
+ coords[:, [0, 2]] -= pad[0] # x padding
190
+ coords[:, [1, 3]] -= pad[1] # y padding
191
+ coords[:, :4] /= gain
192
+ clip_coords(coords, img0_shape)
193
+ return coords
194
+
195
+
196
+ def clip_coords(boxes, shape):
197
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
198
+ if isinstance(boxes, torch.Tensor): # faster individually
199
+ boxes[:, 0].clamp_(0, shape[1]) # x1
200
+ boxes[:, 1].clamp_(0, shape[0]) # y1
201
+ boxes[:, 2].clamp_(0, shape[1]) # x2
202
+ boxes[:, 3].clamp_(0, shape[0]) # y2
203
+ else: # np.array (faster grouped)
204
+ boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
205
+ boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
206
+
asone/detectors/yolor/yolor_detector.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from asone.utils import get_names
4
+ import numpy as np
5
+ import warnings
6
+ import torch
7
+ import onnxruntime
8
+
9
+ from .models.models import *
10
+ from asone import utils
11
+ from asone.detectors.yolor.utils.yolor_utils import (non_max_suppression,
12
+ scale_coords,
13
+ letterbox)
14
+
15
+
16
+ class YOLOrDetector:
17
+ def __init__(self,
18
+ weights=None,
19
+ cfg=None,
20
+ use_onnx=True,
21
+ use_cuda=True,
22
+ ):
23
+
24
+ self.use_onnx = use_onnx
25
+ self.device = 'cuda' if use_cuda else 'cpu'
26
+
27
+ if not os.path.exists(weights):
28
+ utils.download_weights(weights)
29
+
30
+ if cfg == None:
31
+ cfg = os.path.join("cfg", "yolor_p6.cfg")
32
+ # If incase weighst is a list of paths then select path at first index
33
+ weights = str(weights[0] if isinstance(weights, list) else weights)
34
+ # Load Model
35
+ self.model = self.load_model(use_cuda, weights, cfg=cfg, img_size=640)
36
+
37
+ def load_model(self, use_cuda, weights, cfg, img_size, fp16=False):
38
+ # Device: CUDA and if fp16=True only then half precision floating point works
39
+ self.fp16 = fp16 & (
40
+ (not self.use_onnx or self.use_onnx) and self.device != 'cpu')
41
+ # Load onnx
42
+ if self.use_onnx:
43
+ if use_cuda:
44
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
45
+ else:
46
+ providers = ['CPUExecutionProvider']
47
+ model = onnxruntime.InferenceSession(weights, providers=providers)
48
+ # Load Pytorch
49
+ else:
50
+ model = Darknet(cfg, img_size).to(self.device)
51
+ model.load_state_dict(torch.load(
52
+ weights, map_location=self.device)['model'])
53
+ model.to(self.device).eval()
54
+ model.half() if self.fp16 else model.float()
55
+ return model
56
+
57
+ def image_preprocessing(self,
58
+ image: list,
59
+ input_shape=(640, 640)) -> list:
60
+
61
+ original_image = image.copy()
62
+ image = letterbox(image, input_shape, stride=32, auto=False)[0]
63
+ image = image.transpose((2, 0, 1))[::-1]
64
+ image = np.ascontiguousarray(image, dtype=np.float32)
65
+ image /= 255 # 0 - 255 to 0.0 - 1.0
66
+ if len(image.shape) == 3:
67
+ image = image[None] # expand for batch dim
68
+ return original_image, image
69
+
70
+ def detect(self, image: list,
71
+ input_shape: tuple = (640, 640),
72
+ conf_thres: float = 0.25,
73
+ iou_thres: float = 0.45,
74
+ max_det: int = 1000,
75
+ filter_classes: bool = None,
76
+ agnostic_nms: bool = True,
77
+ with_p6: bool = False) -> list:
78
+
79
+ # Image Preprocessing
80
+ original_image, processed_image = self.image_preprocessing(
81
+ image, input_shape)
82
+
83
+ # Inference
84
+ if self.use_onnx:
85
+ # Input names of ONNX model on which it is exported
86
+ input_name = self.model.get_inputs()[0].name
87
+ # Run onnx model
88
+ pred = self.model.run([self.model.get_outputs()[0].name], {
89
+ input_name: processed_image})[0]
90
+ # Run Pytorch model
91
+ else:
92
+ processed_image = torch.from_numpy(processed_image).to(self.device)
93
+ # Change image floating point precision if fp16 set to true
94
+ processed_image = processed_image.half() if self.fp16 else processed_image.float()
95
+ pred = self.model(processed_image, augment=False)[0]
96
+ pred = pred.detach().cpu().numpy()
97
+
98
+ if isinstance(pred, np.ndarray):
99
+ pred = torch.tensor(pred, device=self.device)
100
+ predictions = non_max_suppression(
101
+ pred, conf_thres,
102
+ iou_thres,
103
+ agnostic=agnostic_nms,
104
+ max_det=max_det)
105
+
106
+ for i, prediction in enumerate(predictions): # per image
107
+ if len(prediction):
108
+ prediction[:, :4] = scale_coords(
109
+ processed_image.shape[2:], prediction[:, :4], original_image.shape).round()
110
+ predictions[i] = prediction
111
+
112
+ predictions = predictions[0].cpu().numpy()
113
+ image_info = {
114
+ 'width': original_image.shape[1],
115
+ 'height': original_image.shape[0],
116
+ }
117
+
118
+ self.boxes = predictions[:, :4]
119
+ self.scores = predictions[:, 4:5]
120
+ self.class_ids = predictions[:, 5:6]
121
+
122
+ if filter_classes:
123
+ class_names = get_names()
124
+
125
+ filter_class_idx = []
126
+ if filter_classes:
127
+ for _class in filter_classes:
128
+ if _class.lower() in class_names:
129
+ filter_class_idx.append(
130
+ class_names.index(_class.lower()))
131
+ else:
132
+ warnings.warn(
133
+ f"class {_class} not found in model classes list.")
134
+
135
+ detection = detection[np.in1d(
136
+ detection[:, 5].astype(int), filter_class_idx)]
137
+
138
+ return predictions, image_info
asone/detectors/yolov5/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .yolov5_detector import YOLOv5Detector
2
+ __all__ = ['YOLOv5Detector']
asone/detectors/yolov5/yolov5/__init__.py ADDED
File without changes
asone/detectors/yolov5/yolov5/models/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import os
2
+ import sys
3
+ sys.path.append(os.path.dirname(__file__))
asone/detectors/yolov5/yolov5/models/common.py ADDED
@@ -0,0 +1,756 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
+ """
3
+ Common modules
4
+ """
5
+
6
+ import json
7
+ import math
8
+ import platform
9
+ import warnings
10
+ from collections import OrderedDict, namedtuple
11
+ from copy import copy
12
+ from pathlib import Path
13
+
14
+ import cv2
15
+ import numpy as np
16
+ import pandas as pd
17
+ import requests
18
+ import torch
19
+ import torch.nn as nn
20
+ import yaml
21
+ from PIL import Image
22
+ from torch.cuda import amp
23
+
24
+ from asone.detectors.yolov5.yolov5.models.general import (LOGGER, check_requirements,
25
+ check_suffix, check_version,
26
+ colorstr, increment_path)
27
+
28
+ def autopad(k, p=None): # kernel, padding
29
+ # Pad to 'same'
30
+ if p is None:
31
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
32
+ return p
33
+
34
+
35
+ class Conv(nn.Module):
36
+ # Standard convolution
37
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
38
+ super().__init__()
39
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
40
+ self.bn = nn.BatchNorm2d(c2)
41
+ self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
42
+
43
+ def forward(self, x):
44
+ return self.act(self.bn(self.conv(x)))
45
+
46
+ def forward_fuse(self, x):
47
+ return self.act(self.conv(x))
48
+
49
+
50
+ class DWConv(Conv):
51
+ # Depth-wise convolution class
52
+ def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
53
+ super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
54
+
55
+
56
+ class DWConvTranspose2d(nn.ConvTranspose2d):
57
+ # Depth-wise transpose convolution class
58
+ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
59
+ super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
60
+
61
+
62
+ class TransformerLayer(nn.Module):
63
+ # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
64
+ def __init__(self, c, num_heads):
65
+ super().__init__()
66
+ self.q = nn.Linear(c, c, bias=False)
67
+ self.k = nn.Linear(c, c, bias=False)
68
+ self.v = nn.Linear(c, c, bias=False)
69
+ self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
70
+ self.fc1 = nn.Linear(c, c, bias=False)
71
+ self.fc2 = nn.Linear(c, c, bias=False)
72
+
73
+ def forward(self, x):
74
+ x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
75
+ x = self.fc2(self.fc1(x)) + x
76
+ return x
77
+
78
+
79
+ class TransformerBlock(nn.Module):
80
+ # Vision Transformer https://arxiv.org/abs/2010.11929
81
+ def __init__(self, c1, c2, num_heads, num_layers):
82
+ super().__init__()
83
+ self.conv = None
84
+ if c1 != c2:
85
+ self.conv = Conv(c1, c2)
86
+ self.linear = nn.Linear(c2, c2) # learnable position embedding
87
+ self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
88
+ self.c2 = c2
89
+
90
+ def forward(self, x):
91
+ if self.conv is not None:
92
+ x = self.conv(x)
93
+ b, _, w, h = x.shape
94
+ p = x.flatten(2).permute(2, 0, 1)
95
+ return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
96
+
97
+
98
+ class Bottleneck(nn.Module):
99
+ # Standard bottleneck
100
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
101
+ super().__init__()
102
+ c_ = int(c2 * e) # hidden channels
103
+ self.cv1 = Conv(c1, c_, 1, 1)
104
+ self.cv2 = Conv(c_, c2, 3, 1, g=g)
105
+ self.add = shortcut and c1 == c2
106
+
107
+ def forward(self, x):
108
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
109
+
110
+
111
+ class BottleneckCSP(nn.Module):
112
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
113
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
114
+ super().__init__()
115
+ c_ = int(c2 * e) # hidden channels
116
+ self.cv1 = Conv(c1, c_, 1, 1)
117
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
118
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
119
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
120
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
121
+ self.act = nn.SiLU()
122
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
123
+
124
+ def forward(self, x):
125
+ y1 = self.cv3(self.m(self.cv1(x)))
126
+ y2 = self.cv2(x)
127
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
128
+
129
+
130
+ class CrossConv(nn.Module):
131
+ # Cross Convolution Downsample
132
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
133
+ # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
134
+ super().__init__()
135
+ c_ = int(c2 * e) # hidden channels
136
+ self.cv1 = Conv(c1, c_, (1, k), (1, s))
137
+ self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
138
+ self.add = shortcut and c1 == c2
139
+
140
+ def forward(self, x):
141
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
142
+
143
+
144
+ class C3(nn.Module):
145
+ # CSP Bottleneck with 3 convolutions
146
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
147
+ super().__init__()
148
+ c_ = int(c2 * e) # hidden channels
149
+ self.cv1 = Conv(c1, c_, 1, 1)
150
+ self.cv2 = Conv(c1, c_, 1, 1)
151
+ self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
152
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
153
+
154
+ def forward(self, x):
155
+ return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
156
+
157
+
158
+ class C3x(C3):
159
+ # C3 module with cross-convolutions
160
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
161
+ super().__init__(c1, c2, n, shortcut, g, e)
162
+ c_ = int(c2 * e)
163
+ self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
164
+
165
+
166
+ class C3TR(C3):
167
+ # C3 module with TransformerBlock()
168
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
169
+ super().__init__(c1, c2, n, shortcut, g, e)
170
+ c_ = int(c2 * e)
171
+ self.m = TransformerBlock(c_, c_, 4, n)
172
+
173
+
174
+ class C3SPP(C3):
175
+ # C3 module with SPP()
176
+ def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
177
+ super().__init__(c1, c2, n, shortcut, g, e)
178
+ c_ = int(c2 * e)
179
+ self.m = SPP(c_, c_, k)
180
+
181
+
182
+ class C3Ghost(C3):
183
+ # C3 module with GhostBottleneck()
184
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
185
+ super().__init__(c1, c2, n, shortcut, g, e)
186
+ c_ = int(c2 * e) # hidden channels
187
+ self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
188
+
189
+
190
+ class SPP(nn.Module):
191
+ # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
192
+ def __init__(self, c1, c2, k=(5, 9, 13)):
193
+ super().__init__()
194
+ c_ = c1 // 2 # hidden channels
195
+ self.cv1 = Conv(c1, c_, 1, 1)
196
+ self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
197
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
198
+
199
+ def forward(self, x):
200
+ x = self.cv1(x)
201
+ with warnings.catch_warnings():
202
+ warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
203
+ return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
204
+
205
+
206
+ class SPPF(nn.Module):
207
+ # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
208
+ def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
209
+ super().__init__()
210
+ c_ = c1 // 2 # hidden channels
211
+ self.cv1 = Conv(c1, c_, 1, 1)
212
+ self.cv2 = Conv(c_ * 4, c2, 1, 1)
213
+ self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
214
+
215
+ def forward(self, x):
216
+ x = self.cv1(x)
217
+ with warnings.catch_warnings():
218
+ warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
219
+ y1 = self.m(x)
220
+ y2 = self.m(y1)
221
+ return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
222
+
223
+
224
+ class Focus(nn.Module):
225
+ # Focus wh information into c-space
226
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
227
+ super().__init__()
228
+ self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
229
+ # self.contract = Contract(gain=2)
230
+
231
+ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
232
+ return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
233
+ # return self.conv(self.contract(x))
234
+
235
+
236
+ class GhostConv(nn.Module):
237
+ # Ghost Convolution https://github.com/huawei-noah/ghostnet
238
+ def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
239
+ super().__init__()
240
+ c_ = c2 // 2 # hidden channels
241
+ self.cv1 = Conv(c1, c_, k, s, None, g, act)
242
+ self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
243
+
244
+ def forward(self, x):
245
+ y = self.cv1(x)
246
+ return torch.cat((y, self.cv2(y)), 1)
247
+
248
+
249
+ class GhostBottleneck(nn.Module):
250
+ # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
251
+ def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
252
+ super().__init__()
253
+ c_ = c2 // 2
254
+ self.conv = nn.Sequential(
255
+ GhostConv(c1, c_, 1, 1), # pw
256
+ DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
257
+ GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
258
+ self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
259
+ act=False)) if s == 2 else nn.Identity()
260
+
261
+ def forward(self, x):
262
+ return self.conv(x) + self.shortcut(x)
263
+
264
+
265
+ class Contract(nn.Module):
266
+ # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
267
+ def __init__(self, gain=2):
268
+ super().__init__()
269
+ self.gain = gain
270
+
271
+ def forward(self, x):
272
+ b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
273
+ s = self.gain
274
+ x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
275
+ x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
276
+ return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
277
+
278
+
279
+ class Expand(nn.Module):
280
+ # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
281
+ def __init__(self, gain=2):
282
+ super().__init__()
283
+ self.gain = gain
284
+
285
+ def forward(self, x):
286
+ b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
287
+ s = self.gain
288
+ x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
289
+ x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
290
+ return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
291
+
292
+
293
+ class Concat(nn.Module):
294
+ # Concatenate a list of tensors along dimension
295
+ def __init__(self, dimension=1):
296
+ super().__init__()
297
+ self.d = dimension
298
+
299
+ def forward(self, x):
300
+ return torch.cat(x, self.d)
301
+
302
+
303
+ class DetectMultiBackend(nn.Module):
304
+ # YOLOv5 MultiBackend class for python inference on various backends
305
+ def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
306
+ # Usage:
307
+ # PyTorch: weights = *.pt
308
+ # TorchScript: *.torchscript
309
+ # ONNX Runtime: *.onnx
310
+ # ONNX OpenCV DNN: *.onnx with --dnn
311
+ # OpenVINO: *.xml
312
+ # CoreML: *.mlmodel
313
+ # TensorRT: *.engine
314
+ # TensorFlow SavedModel: *_saved_model
315
+ # TensorFlow GraphDef: *.pb
316
+ # TensorFlow Lite: *.tflite
317
+ # TensorFlow Edge TPU: *_edgetpu.tflite
318
+ from asone.detectors.yolov5.utils.experimental import attempt_download, attempt_load # scoped to avoid circular import
319
+
320
+ super().__init__()
321
+ w = str(weights[0] if isinstance(weights, list) else weights)
322
+ pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
323
+ w = attempt_download(w) # download if not local
324
+ fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16
325
+ stride, names = 32, [f'class{i}' for i in range(1000)] # assign defaults
326
+ if data: # assign class names (optional)
327
+ with open(data, errors='ignore') as f:
328
+ names = yaml.safe_load(f)['names']
329
+
330
+ if pt: # PyTorch
331
+ model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
332
+ stride = max(int(model.stride.max()), 32) # model stride
333
+ names = model.module.names if hasattr(model, 'module') else model.names # get class names
334
+ model.half() if fp16 else model.float()
335
+ self.model = model # explicitly assign for to(), cpu(), cuda(), half()
336
+ elif jit: # TorchScript
337
+ LOGGER.info(f'Loading {w} for TorchScript inference...')
338
+ extra_files = {'config.txt': ''} # model metadata
339
+ model = torch.jit.load(w, _extra_files=extra_files)
340
+ model.half() if fp16 else model.float()
341
+ if extra_files['config.txt']:
342
+ d = json.loads(extra_files['config.txt']) # extra_files dict
343
+ stride, names = int(d['stride']), d['names']
344
+ elif dnn: # ONNX OpenCV DNN
345
+ LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
346
+ check_requirements(('opencv-python>=4.5.4',))
347
+ net = cv2.dnn.readNetFromONNX(w)
348
+ elif onnx: # ONNX Runtime
349
+ LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
350
+ cuda = torch.cuda.is_available()
351
+ check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
352
+ import onnxruntime
353
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
354
+ session = onnxruntime.InferenceSession(w, providers=providers)
355
+ meta = session.get_modelmeta().custom_metadata_map # metadata
356
+ if 'stride' in meta:
357
+ stride, names = int(meta['stride']), eval(meta['names'])
358
+ elif xml: # OpenVINO
359
+ LOGGER.info(f'Loading {w} for OpenVINO inference...')
360
+ check_requirements(('openvino',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
361
+ import openvino
362
+ from openvino.runtime import Core, Layout, get_batch
363
+ ie = Core()
364
+ if not Path(w).is_file(): # if not *.xml
365
+ w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
366
+ network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
367
+ if network.get_parameters()[0].get_layout().empty:
368
+ network.get_parameters()[0].set_layout(Layout("NCHW"))
369
+ batch_dim = get_batch(network)
370
+ if batch_dim.is_static:
371
+ batch_size = batch_dim.get_length()
372
+ executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2
373
+ output_layer = next(iter(executable_network.outputs))
374
+ meta = Path(w).with_suffix('.yaml')
375
+ if meta.exists():
376
+ stride, names = self._load_metadata(meta) # load metadata
377
+ elif engine: # TensorRT
378
+ LOGGER.info(f'Loading {w} for TensorRT inference...')
379
+ import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
380
+ check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
381
+ Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
382
+ logger = trt.Logger(trt.Logger.INFO)
383
+ with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
384
+ model = runtime.deserialize_cuda_engine(f.read())
385
+ context = model.create_execution_context()
386
+ bindings = OrderedDict()
387
+ fp16 = False # default updated below
388
+ dynamic_input = False
389
+ for index in range(model.num_bindings):
390
+ name = model.get_binding_name(index)
391
+ dtype = trt.nptype(model.get_binding_dtype(index))
392
+ if model.binding_is_input(index):
393
+ if -1 in tuple(model.get_binding_shape(index)): # dynamic
394
+ dynamic_input = True
395
+ context.set_binding_shape(index, tuple(model.get_profile_shape(0, index)[2]))
396
+ if dtype == np.float16:
397
+ fp16 = True
398
+ shape = tuple(context.get_binding_shape(index))
399
+ data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
400
+ bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
401
+ binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
402
+ batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
403
+ elif coreml: # CoreML
404
+ LOGGER.info(f'Loading {w} for CoreML inference...')
405
+ import coremltools as ct
406
+ model = ct.models.MLModel(w)
407
+ else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
408
+ if saved_model: # SavedModel
409
+ LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
410
+ import tensorflow as tf
411
+ keras = False # assume TF1 saved_model
412
+ model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
413
+ elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
414
+ LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
415
+ import tensorflow as tf
416
+
417
+ def wrap_frozen_graph(gd, inputs, outputs):
418
+ x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
419
+ ge = x.graph.as_graph_element
420
+ return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
421
+
422
+ gd = tf.Graph().as_graph_def() # graph_def
423
+ with open(w, 'rb') as f:
424
+ gd.ParseFromString(f.read())
425
+ frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0")
426
+ elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
427
+ try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
428
+ from tflite_runtime.interpreter import Interpreter, load_delegate
429
+ except ImportError:
430
+ import tensorflow as tf
431
+ Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
432
+ if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime
433
+ LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
434
+ delegate = {
435
+ 'Linux': 'libedgetpu.so.1',
436
+ 'Darwin': 'libedgetpu.1.dylib',
437
+ 'Windows': 'edgetpu.dll'}[platform.system()]
438
+ interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
439
+ else: # Lite
440
+ LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
441
+ interpreter = Interpreter(model_path=w) # load TFLite model
442
+ interpreter.allocate_tensors() # allocate
443
+ input_details = interpreter.get_input_details() # inputs
444
+ output_details = interpreter.get_output_details() # outputs
445
+ elif tfjs:
446
+ raise Exception('ERROR: YOLOv5 TF.js inference is not supported')
447
+ else:
448
+ raise Exception(f'ERROR: {w} is not a supported format')
449
+ self.__dict__.update(locals()) # assign all variables to self
450
+
451
+ def forward(self, im, augment=False, visualize=False, val=False):
452
+ # YOLOv5 MultiBackend inference
453
+ b, ch, h, w = im.shape # batch, channel, height, width
454
+ if self.fp16 and im.dtype != torch.float16:
455
+ im = im.half() # to FP16
456
+
457
+ if self.pt: # PyTorch
458
+ y = self.model(im, augment=augment, visualize=visualize)[0]
459
+ elif self.jit: # TorchScript
460
+ y = self.model(im)[0]
461
+ elif self.dnn: # ONNX OpenCV DNN
462
+ im = im.cpu().numpy() # torch to numpy
463
+ self.net.setInput(im)
464
+ y = self.net.forward()
465
+ elif self.onnx: # ONNX Runtime
466
+ im = im.cpu().numpy() # torch to numpy
467
+ y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
468
+ elif self.xml: # OpenVINO
469
+ im = im.cpu().numpy() # FP32
470
+ y = self.executable_network([im])[self.output_layer]
471
+ elif self.engine: # TensorRT
472
+ if im.shape != self.bindings['images'].shape and self.dynamic_input:
473
+ self.context.set_binding_shape(self.model.get_binding_index('images'), im.shape) # reshape if dynamic
474
+ self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
475
+ assert im.shape == self.bindings['images'].shape, (
476
+ f"image shape {im.shape} exceeds model max shape {self.bindings['images'].shape}" if self.dynamic_input
477
+ else f"image shape {im.shape} does not match model shape {self.bindings['images'].shape}")
478
+ self.binding_addrs['images'] = int(im.data_ptr())
479
+ self.context.execute_v2(list(self.binding_addrs.values()))
480
+ y = self.bindings['output'].data
481
+ elif self.coreml: # CoreML
482
+ im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
483
+ im = Image.fromarray((im[0] * 255).astype('uint8'))
484
+ # im = im.resize((192, 320), Image.ANTIALIAS)
485
+ y = self.model.predict({'image': im}) # coordinates are xywh normalized
486
+ if 'confidence' in y:
487
+ box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
488
+ conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
489
+ y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
490
+ else:
491
+ k = 'var_' + str(sorted(int(k.replace('var_', '')) for k in y)[-1]) # output key
492
+ y = y[k] # output
493
+ else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
494
+ im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
495
+ if self.saved_model: # SavedModel
496
+ y = (self.model(im, training=False) if self.keras else self.model(im)).numpy()
497
+ elif self.pb: # GraphDef
498
+ y = self.frozen_func(x=self.tf.constant(im)).numpy()
499
+ else: # Lite or Edge TPU
500
+ input, output = self.input_details[0], self.output_details[0]
501
+ int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
502
+ if int8:
503
+ scale, zero_point = input['quantization']
504
+ im = (im / scale + zero_point).astype(np.uint8) # de-scale
505
+ self.interpreter.set_tensor(input['index'], im)
506
+ self.interpreter.invoke()
507
+ y = self.interpreter.get_tensor(output['index'])
508
+ if int8:
509
+ scale, zero_point = output['quantization']
510
+ y = (y.astype(np.float32) - zero_point) * scale # re-scale
511
+ y[..., :4] *= [w, h, w, h] # xywh normalized to pixels
512
+
513
+ if isinstance(y, np.ndarray):
514
+ y = torch.tensor(y, device=self.device)
515
+ return (y, []) if val else y
516
+
517
+ def warmup(self, imgsz=(1, 3, 640, 640)):
518
+ # Warmup model by running inference once
519
+ warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb
520
+ if any(warmup_types) and self.device.type != 'cpu':
521
+ im = torch.zeros(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
522
+ for _ in range(2 if self.jit else 1): #
523
+ self.forward(im) # warmup
524
+
525
+ @staticmethod
526
+ def model_type(p='path/to/model.pt'):
527
+ # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
528
+ from export import export_formats
529
+ suffixes = list(export_formats().Suffix) + ['.xml'] # export suffixes
530
+ check_suffix(p, suffixes) # checks
531
+ p = Path(p).name # eliminate trailing separators
532
+ pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, xml2 = (s in p for s in suffixes)
533
+ xml |= xml2 # *_openvino_model or *.xml
534
+ tflite &= not edgetpu # *.tflite
535
+ return pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs
536
+
537
+ @staticmethod
538
+ def _load_metadata(f='path/to/meta.yaml'):
539
+ # Load metadata from meta.yaml if it exists
540
+ with open(f, errors='ignore') as f:
541
+ d = yaml.safe_load(f)
542
+ return d['stride'], d['names'] # assign stride, names
543
+
544
+
545
+ class AutoShape(nn.Module):
546
+ # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
547
+ conf = 0.25 # NMS confidence threshold
548
+ iou = 0.45 # NMS IoU threshold
549
+ agnostic = False # NMS class-agnostic
550
+ multi_label = False # NMS multiple labels per box
551
+ classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
552
+ max_det = 1000 # maximum number of detections per image
553
+ amp = False # Automatic Mixed Precision (AMP) inference
554
+
555
+ def __init__(self, model, verbose=True):
556
+ super().__init__()
557
+ if verbose:
558
+ LOGGER.info('Adding AutoShape... ')
559
+ copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
560
+ self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
561
+ self.pt = not self.dmb or model.pt # PyTorch model
562
+ self.model = model.eval()
563
+
564
+ def _apply(self, fn):
565
+ # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
566
+ self = super()._apply(fn)
567
+ if self.pt:
568
+ m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
569
+ m.stride = fn(m.stride)
570
+ m.grid = list(map(fn, m.grid))
571
+ if isinstance(m.anchor_grid, list):
572
+ m.anchor_grid = list(map(fn, m.anchor_grid))
573
+ return self
574
+
575
+ @torch.no_grad()
576
+ def forward(self, imgs, size=640, augment=False, profile=False):
577
+ # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
578
+ # file: imgs = 'data/images/zidane.jpg' # str or PosixPath
579
+ # URI: = 'https://ultralytics.com/images/zidane.jpg'
580
+ # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
581
+ # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
582
+ # numpy: = np.zeros((640,1280,3)) # HWC
583
+ # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
584
+ # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
585
+
586
+ t = [time_sync()]
587
+ p = next(self.model.parameters()) if self.pt else torch.zeros(1, device=self.model.device) # for device, type
588
+ autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
589
+ if isinstance(imgs, torch.Tensor): # torch
590
+ with amp.autocast(autocast):
591
+ return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
592
+
593
+ # Pre-process
594
+ n, imgs = (len(imgs), list(imgs)) if isinstance(imgs, (list, tuple)) else (1, [imgs]) # number, list of images
595
+ shape0, shape1, files = [], [], [] # image and inference shapes, filenames
596
+ for i, im in enumerate(imgs):
597
+ f = f'image{i}' # filename
598
+ if isinstance(im, (str, Path)): # filename or uri
599
+ im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
600
+ im = np.asarray(exif_transpose(im))
601
+ elif isinstance(im, Image.Image): # PIL Image
602
+ im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
603
+ files.append(Path(f).with_suffix('.jpg').name)
604
+ if im.shape[0] < 5: # image in CHW
605
+ im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
606
+ im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
607
+ s = im.shape[:2] # HWC
608
+ shape0.append(s) # image shape
609
+ g = (size / max(s)) # gain
610
+ shape1.append([y * g for y in s])
611
+ imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
612
+ shape1 = [make_divisible(x, self.stride) if self.pt else size for x in np.array(shape1).max(0)] # inf shape
613
+ x = [letterbox(im, shape1, auto=False)[0] for im in imgs] # pad
614
+ x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
615
+ x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
616
+ t.append(time_sync())
617
+
618
+ with amp.autocast(autocast):
619
+ # Inference
620
+ y = self.model(x, augment, profile) # forward
621
+ t.append(time_sync())
622
+
623
+ # Post-process
624
+ y = non_max_suppression(y if self.dmb else y[0],
625
+ self.conf,
626
+ self.iou,
627
+ self.classes,
628
+ self.agnostic,
629
+ self.multi_label,
630
+ max_det=self.max_det) # NMS
631
+ for i in range(n):
632
+ scale_coords(shape1, y[i][:, :4], shape0[i])
633
+
634
+ t.append(time_sync())
635
+ return Detections(imgs, y, files, t, self.names, x.shape)
636
+
637
+
638
+ class Detections:
639
+ # YOLOv5 detections class for inference results
640
+ def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None):
641
+ super().__init__()
642
+ d = pred[0].device # device
643
+ gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations
644
+ self.imgs = imgs # list of images as numpy arrays
645
+ self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
646
+ self.names = names # class names
647
+ self.files = files # image filenames
648
+ self.times = times # profiling times
649
+ self.xyxy = pred # xyxy pixels
650
+ self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
651
+ self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
652
+ self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
653
+ self.n = len(self.pred) # number of images (batch size)
654
+ self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
655
+ self.s = shape # inference BCHW shape
656
+
657
+ def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
658
+ crops = []
659
+ for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
660
+ s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
661
+ if pred.shape[0]:
662
+ for c in pred[:, -1].unique():
663
+ n = (pred[:, -1] == c).sum() # detections per class
664
+ s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
665
+ if show or save or render or crop:
666
+ annotator = Annotator(im, example=str(self.names))
667
+ for *box, conf, cls in reversed(pred): # xyxy, confidence, class
668
+ label = f'{self.names[int(cls)]} {conf:.2f}'
669
+ if crop:
670
+ file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
671
+ crops.append({
672
+ 'box': box,
673
+ 'conf': conf,
674
+ 'cls': cls,
675
+ 'label': label,
676
+ 'im': save_one_box(box, im, file=file, save=save)})
677
+ else: # all others
678
+ annotator.box_label(box, label if labels else '', color=colors(cls))
679
+ im = annotator.im
680
+ else:
681
+ s += '(no detections)'
682
+
683
+ im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
684
+ if pprint:
685
+ print(s.rstrip(', '))
686
+ if show:
687
+ im.show(self.files[i]) # show
688
+ if save:
689
+ f = self.files[i]
690
+ im.save(save_dir / f) # save
691
+ if i == self.n - 1:
692
+ LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
693
+ if render:
694
+ self.imgs[i] = np.asarray(im)
695
+ if crop:
696
+ if save:
697
+ LOGGER.info(f'Saved results to {save_dir}\n')
698
+ return crops
699
+
700
+ def print(self):
701
+ self.display(pprint=True) # print results
702
+ print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
703
+
704
+ def show(self, labels=True):
705
+ self.display(show=True, labels=labels) # show results
706
+
707
+ def save(self, labels=True, save_dir='runs/detect/exp'):
708
+ save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
709
+ self.display(save=True, labels=labels, save_dir=save_dir) # save results
710
+
711
+ def crop(self, save=True, save_dir='runs/detect/exp'):
712
+ save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
713
+ return self.display(crop=True, save=save, save_dir=save_dir) # crop results
714
+
715
+ def render(self, labels=True):
716
+ self.display(render=True, labels=labels) # render results
717
+ return self.imgs
718
+
719
+ def pandas(self):
720
+ # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
721
+ new = copy(self) # return copy
722
+ ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
723
+ cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
724
+ for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
725
+ a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
726
+ setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
727
+ return new
728
+
729
+ def tolist(self):
730
+ # return a list of Detections objects, i.e. 'for result in results.tolist():'
731
+ r = range(self.n) # iterable
732
+ x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
733
+ # for d in x:
734
+ # for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
735
+ # setattr(d, k, getattr(d, k)[0]) # pop out of list
736
+ return x
737
+
738
+ def __len__(self):
739
+ return self.n # override len(results)
740
+
741
+ def __str__(self):
742
+ self.print() # override print(results)
743
+ return ''
744
+
745
+
746
+ class Classify(nn.Module):
747
+ # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
748
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
749
+ super().__init__()
750
+ self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
751
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
752
+ self.flat = nn.Flatten()
753
+
754
+ def forward(self, x):
755
+ z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
756
+ return self.flat(self.conv(z)) # flatten to x(b,c2)
asone/detectors/yolov5/yolov5/models/experimental.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
+ """
3
+ Experimental modules
4
+ """
5
+ import math
6
+
7
+ import numpy as np
8
+ import torch
9
+ import torch.nn as nn
10
+ from asone.detectors.yolov5.yolov5.utils.yolov5_utils import yolov5_in_syspath
11
+
12
+ class Ensemble(nn.ModuleList):
13
+ # Ensemble of models
14
+ def __init__(self):
15
+ super().__init__()
16
+
17
+ def forward(self, x, augment=False, profile=False, visualize=False):
18
+ y = [module(x, augment, profile, visualize)[0] for module in self]
19
+ # y = torch.stack(y).max(0)[0] # max ensemble
20
+ # y = torch.stack(y).mean(0) # mean ensemble
21
+ y = torch.cat(y, 1) # nms ensemble
22
+ return y, None # inference, train output
23
+
24
+
25
+ def attempt_load(weights, device=None, inplace=True, fuse=True):
26
+ # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
27
+ with yolov5_in_syspath():
28
+ from asone.detectors.yolov5.yolov5.models.yolo import Detect, Model
29
+
30
+ model = Ensemble()
31
+ for w in weights if isinstance(weights, list) else [weights]:
32
+ with yolov5_in_syspath():
33
+ ckpt = torch.load(w, map_location='cpu') # load
34
+ ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
35
+ model.append(ckpt.fuse().eval() if fuse else ckpt.eval()) # fused or un-fused model in eval mode
36
+
37
+ # Compatibility updates
38
+ for m in model.modules():
39
+ t = type(m)
40
+ if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
41
+ m.inplace = inplace # torch 1.7.0 compatibility
42
+ if t is Detect and not isinstance(m.anchor_grid, list):
43
+ delattr(m, 'anchor_grid')
44
+ setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
45
+ elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
46
+ m.recompute_scale_factor = None # torch 1.11.0 compatibility
47
+
48
+ if len(model) == 1:
49
+ return model[-1] # return model
50
+ print(f'Ensemble created with {weights}\n')
51
+ for k in 'names', 'nc', 'yaml':
52
+ setattr(model, k, getattr(model[0], k))
53
+ model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
54
+ assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
55
+ return model # return ensemble
56
+
asone/detectors/yolov5/yolov5/models/general.py ADDED
@@ -0,0 +1,1036 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
+ """
3
+ General utils
4
+ """
5
+
6
+ import contextlib
7
+ import glob
8
+ import inspect
9
+ import logging
10
+ import math
11
+ import os
12
+ import platform
13
+ import random
14
+ import re
15
+ import shutil
16
+ import signal
17
+ import threading
18
+ import time
19
+ import urllib
20
+ from datetime import datetime
21
+ from itertools import repeat
22
+ from multiprocessing.pool import ThreadPool
23
+ from pathlib import Path
24
+ from subprocess import check_output
25
+ from typing import Optional
26
+ from zipfile import ZipFile
27
+
28
+ import cv2
29
+ import numpy as np
30
+ import pandas as pd
31
+ import pkg_resources as pkg
32
+ import torch
33
+ import torchvision
34
+ import yaml
35
+
36
+ FILE = Path(__file__).resolve()
37
+ ROOT = FILE.parents[1] # YOLOv5 root directory
38
+ RANK = int(os.getenv('RANK', -1))
39
+
40
+ # Settings
41
+ DATASETS_DIR = ROOT.parent / 'datasets' # YOLOv5 datasets directory
42
+ NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
43
+ AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode
44
+ VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
45
+ FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
46
+
47
+ torch.set_printoptions(linewidth=320, precision=5, profile='long')
48
+ np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
49
+ pd.options.display.max_columns = 10
50
+ cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
51
+ os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads
52
+ os.environ['OMP_NUM_THREADS'] = '1' if platform.system() == 'darwin' else str(NUM_THREADS) # OpenMP (PyTorch and SciPy)
53
+
54
+
55
+ def is_kaggle():
56
+ # Is environment a Kaggle Notebook?
57
+ try:
58
+ assert os.environ.get('PWD') == '/kaggle/working'
59
+ assert os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com'
60
+ return True
61
+ except AssertionError:
62
+ return False
63
+
64
+
65
+ def is_writeable(dir, test=False):
66
+ # Return True if directory has write permissions, test opening a file with write permissions if test=True
67
+ if not test:
68
+ return os.access(dir, os.R_OK) # possible issues on Windows
69
+ file = Path(dir) / 'tmp.txt'
70
+ try:
71
+ with open(file, 'w'): # open file with write permissions
72
+ pass
73
+ file.unlink() # remove file
74
+ return True
75
+ except OSError:
76
+ return False
77
+
78
+
79
+ def set_logging(name=None, verbose=VERBOSE):
80
+ # Sets level and returns logger
81
+ if is_kaggle():
82
+ for h in logging.root.handlers:
83
+ logging.root.removeHandler(h) # remove all handlers associated with the root logger object
84
+ rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings
85
+ level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
86
+ log = logging.getLogger(name)
87
+ log.setLevel(level)
88
+ handler = logging.StreamHandler()
89
+ handler.setFormatter(logging.Formatter("%(message)s"))
90
+ handler.setLevel(level)
91
+ log.addHandler(handler)
92
+
93
+
94
+ set_logging() # run before defining LOGGER
95
+ LOGGER = logging.getLogger("yolov5") # define globally (used in train.py, val.py, detect.py, etc.)
96
+
97
+
98
+ def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'):
99
+ # Return path of user configuration directory. Prefer environment variable if exists. Make dir if required.
100
+ env = os.getenv(env_var)
101
+ if env:
102
+ path = Path(env) # use environment variable
103
+ else:
104
+ cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs
105
+ path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir
106
+ path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable
107
+ path.mkdir(exist_ok=True) # make if required
108
+ return path
109
+
110
+
111
+ CONFIG_DIR = user_config_dir() # Ultralytics settings dir
112
+
113
+
114
+ class Profile(contextlib.ContextDecorator):
115
+ # Usage: @Profile() decorator or 'with Profile():' context manager
116
+ def __enter__(self):
117
+ self.start = time.time()
118
+
119
+ def __exit__(self, type, value, traceback):
120
+ print(f'Profile results: {time.time() - self.start:.5f}s')
121
+
122
+
123
+ class Timeout(contextlib.ContextDecorator):
124
+ # Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager
125
+ def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True):
126
+ self.seconds = int(seconds)
127
+ self.timeout_message = timeout_msg
128
+ self.suppress = bool(suppress_timeout_errors)
129
+
130
+ def _timeout_handler(self, signum, frame):
131
+ raise TimeoutError(self.timeout_message)
132
+
133
+ def __enter__(self):
134
+ if platform.system() != 'Windows': # not supported on Windows
135
+ signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM
136
+ signal.alarm(self.seconds) # start countdown for SIGALRM to be raised
137
+
138
+ def __exit__(self, exc_type, exc_val, exc_tb):
139
+ if platform.system() != 'Windows':
140
+ signal.alarm(0) # Cancel SIGALRM if it's scheduled
141
+ if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError
142
+ return True
143
+
144
+
145
+ class WorkingDirectory(contextlib.ContextDecorator):
146
+ # Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager
147
+ def __init__(self, new_dir):
148
+ self.dir = new_dir # new dir
149
+ self.cwd = Path.cwd().resolve() # current dir
150
+
151
+ def __enter__(self):
152
+ os.chdir(self.dir)
153
+
154
+ def __exit__(self, exc_type, exc_val, exc_tb):
155
+ os.chdir(self.cwd)
156
+
157
+
158
+ def try_except(func):
159
+ # try-except function. Usage: @try_except decorator
160
+ def handler(*args, **kwargs):
161
+ try:
162
+ func(*args, **kwargs)
163
+ except Exception as e:
164
+ print(e)
165
+
166
+ return handler
167
+
168
+
169
+ def threaded(func):
170
+ # Multi-threads a target function and returns thread. Usage: @threaded decorator
171
+ def wrapper(*args, **kwargs):
172
+ thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
173
+ thread.start()
174
+ return thread
175
+
176
+ return wrapper
177
+
178
+
179
+ def methods(instance):
180
+ # Get class/instance methods
181
+ return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")]
182
+
183
+
184
+ def print_args(args: Optional[dict] = None, show_file=True, show_fcn=False):
185
+ # Print function arguments (optional args dict)
186
+ x = inspect.currentframe().f_back # previous frame
187
+ file, _, fcn, _, _ = inspect.getframeinfo(x)
188
+ if args is None: # get args automatically
189
+ args, _, _, frm = inspect.getargvalues(x)
190
+ args = {k: v for k, v in frm.items() if k in args}
191
+ s = (f'{Path(file).stem}: ' if show_file else '') + (f'{fcn}: ' if show_fcn else '')
192
+ LOGGER.info(colorstr(s) + ', '.join(f'{k}={v}' for k, v in args.items()))
193
+
194
+
195
+ def init_seeds(seed=0, deterministic=False):
196
+ # Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html
197
+ # cudnn seed 0 settings are slower and more reproducible, else faster and less reproducible
198
+ import torch.backends.cudnn as cudnn
199
+
200
+ if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213
201
+ torch.use_deterministic_algorithms(True)
202
+ os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
203
+ os.environ['PYTHONHASHSEED'] = str(seed)
204
+
205
+ random.seed(seed)
206
+ np.random.seed(seed)
207
+ torch.manual_seed(seed)
208
+ cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False)
209
+ torch.cuda.manual_seed(seed)
210
+ torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe
211
+
212
+
213
+ def intersect_dicts(da, db, exclude=()):
214
+ # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
215
+ return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
216
+
217
+
218
+ def get_latest_run(search_dir='.'):
219
+ # Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
220
+ last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
221
+ return max(last_list, key=os.path.getctime) if last_list else ''
222
+
223
+
224
+ def is_docker() -> bool:
225
+ """Check if the process runs inside a docker container."""
226
+ if Path("/.dockerenv").exists():
227
+ return True
228
+ try: # check if docker is in control groups
229
+ with open("/proc/self/cgroup") as file:
230
+ return any("docker" in line for line in file)
231
+ except OSError:
232
+ return False
233
+
234
+
235
+ def is_colab():
236
+ # Is environment a Google Colab instance?
237
+ try:
238
+ import google.colab
239
+ return True
240
+ except ImportError:
241
+ return False
242
+
243
+
244
+ def is_pip():
245
+ # Is file in a pip package?
246
+ return 'site-packages' in Path(__file__).resolve().parts
247
+
248
+
249
+ def is_ascii(s=''):
250
+ # Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)
251
+ s = str(s) # convert list, tuple, None, etc. to str
252
+ return len(s.encode().decode('ascii', 'ignore')) == len(s)
253
+
254
+
255
+ def is_chinese(s='人工智能'):
256
+ # Is string composed of any Chinese characters?
257
+ return bool(re.search('[\u4e00-\u9fff]', str(s)))
258
+
259
+
260
+ def emojis(str=''):
261
+ # Return platform-dependent emoji-safe version of string
262
+ return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
263
+
264
+
265
+ def file_age(path=__file__):
266
+ # Return days since last file update
267
+ dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
268
+ return dt.days # + dt.seconds / 86400 # fractional days
269
+
270
+
271
+ def file_date(path=__file__):
272
+ # Return human-readable file modification date, i.e. '2021-3-26'
273
+ t = datetime.fromtimestamp(Path(path).stat().st_mtime)
274
+ return f'{t.year}-{t.month}-{t.day}'
275
+
276
+
277
+ def file_size(path):
278
+ # Return file/dir size (MB)
279
+ mb = 1 << 20 # bytes to MiB (1024 ** 2)
280
+ path = Path(path)
281
+ if path.is_file():
282
+ return path.stat().st_size / mb
283
+ elif path.is_dir():
284
+ return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb
285
+ else:
286
+ return 0.0
287
+
288
+
289
+ def check_online():
290
+ # Check internet connectivity
291
+ import socket
292
+ try:
293
+ socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility
294
+ return True
295
+ except OSError:
296
+ return False
297
+
298
+
299
+ def git_describe(path=ROOT): # path must be a directory
300
+ # Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
301
+ try:
302
+ assert (Path(path) / '.git').is_dir()
303
+ return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1]
304
+ except Exception:
305
+ return ''
306
+
307
+
308
+ @try_except
309
+ @WorkingDirectory(ROOT)
310
+ def check_git_status(repo='ultralytics/yolov5'):
311
+ # YOLOv5 status check, recommend 'git pull' if code is out of date
312
+ url = f'https://github.com/{repo}'
313
+ msg = f', for updates see {url}'
314
+ s = colorstr('github: ') # string
315
+ assert Path('.git').exists(), s + 'skipping check (not a git repository)' + msg
316
+ assert check_online(), s + 'skipping check (offline)' + msg
317
+
318
+ splits = re.split(pattern=r'\s', string=check_output('git remote -v', shell=True).decode())
319
+ matches = [repo in s for s in splits]
320
+ if any(matches):
321
+ remote = splits[matches.index(True) - 1]
322
+ else:
323
+ remote = 'ultralytics'
324
+ check_output(f'git remote add {remote} {url}', shell=True)
325
+ check_output(f'git fetch {remote}', shell=True, timeout=5) # git fetch
326
+ branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out
327
+ n = int(check_output(f'git rev-list {branch}..{remote}/master --count', shell=True)) # commits behind
328
+ if n > 0:
329
+ pull = 'git pull' if remote == 'origin' else f'git pull {remote} master'
330
+ s += f"⚠️ YOLOv5 is out of date by {n} commit{'s' * (n > 1)}. Use `{pull}` or `git clone {url}` to update."
331
+ else:
332
+ s += f'up to date with {url} ✅'
333
+ LOGGER.info(emojis(s)) # emoji-safe
334
+
335
+
336
+ def check_python(minimum='3.7.0'):
337
+ # Check current python version vs. required python version
338
+ check_version(platform.python_version(), minimum, name='Python ', hard=True)
339
+
340
+
341
+ def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False):
342
+ # Check version vs. required version
343
+ current, minimum = (pkg.parse_version(x) for x in (current, minimum))
344
+ result = (current == minimum) if pinned else (current >= minimum) # bool
345
+ s = f'{name}{minimum} required by YOLOv5, but {name}{current} is currently installed' # string
346
+ if hard:
347
+ assert result, s # assert min requirements met
348
+ if verbose and not result:
349
+ LOGGER.warning(s)
350
+ return result
351
+
352
+
353
+ @try_except
354
+ def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=()):
355
+ # Check installed dependencies meet requirements (pass *.txt file or list of packages)
356
+ prefix = colorstr('red', 'bold', 'requirements:')
357
+ check_python() # check python version
358
+ if isinstance(requirements, (str, Path)): # requirements.txt file
359
+ file = Path(requirements)
360
+ assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
361
+ with file.open() as f:
362
+ requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
363
+ else: # list or tuple of packages
364
+ requirements = [x for x in requirements if x not in exclude]
365
+
366
+ n = 0 # number of packages updates
367
+ for i, r in enumerate(requirements):
368
+ try:
369
+ pkg.require(r)
370
+ except Exception: # DistributionNotFound or VersionConflict if requirements not met
371
+ s = f"{prefix} {r} not found and is required by YOLOv5"
372
+ if install and AUTOINSTALL: # check environment variable
373
+ LOGGER.info(f"{s}, attempting auto-update...")
374
+ try:
375
+ assert check_online(), f"'pip install {r}' skipped (offline)"
376
+ LOGGER.info(check_output(f'pip install "{r}" {cmds[i] if cmds else ""}', shell=True).decode())
377
+ n += 1
378
+ except Exception as e:
379
+ LOGGER.warning(f'{prefix} {e}')
380
+ else:
381
+ LOGGER.info(f'{s}. Please install and rerun your command.')
382
+
383
+ if n: # if packages updated
384
+ source = file.resolve() if 'file' in locals() else requirements
385
+ s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
386
+ f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
387
+ LOGGER.info(emojis(s))
388
+
389
+
390
+ def check_img_size(imgsz, s=32, floor=0):
391
+ # Verify image size is a multiple of stride s in each dimension
392
+ if isinstance(imgsz, int): # integer i.e. img_size=640
393
+ new_size = max(make_divisible(imgsz, int(s)), floor)
394
+ else: # list i.e. img_size=[640, 480]
395
+ imgsz = list(imgsz) # convert to list if tuple
396
+ new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
397
+ if new_size != imgsz:
398
+ LOGGER.warning(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
399
+ return new_size
400
+
401
+
402
+ def check_imshow():
403
+ # Check if environment supports image displays
404
+ try:
405
+ assert not is_docker(), 'cv2.imshow() is disabled in Docker environments'
406
+ assert not is_colab(), 'cv2.imshow() is disabled in Google Colab environments'
407
+ cv2.imshow('test', np.zeros((1, 1, 3)))
408
+ cv2.waitKey(1)
409
+ cv2.destroyAllWindows()
410
+ cv2.waitKey(1)
411
+ return True
412
+ except Exception as e:
413
+ LOGGER.warning(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
414
+ return False
415
+
416
+
417
+ def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
418
+ # Check file(s) for acceptable suffix
419
+ if file and suffix:
420
+ if isinstance(suffix, str):
421
+ suffix = [suffix]
422
+ for f in file if isinstance(file, (list, tuple)) else [file]:
423
+ s = Path(f).suffix.lower() # file suffix
424
+ if len(s):
425
+ assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}"
426
+
427
+
428
+ def check_yaml(file, suffix=('.yaml', '.yml')):
429
+ # Search/download YAML file (if necessary) and return path, checking suffix
430
+ return check_file(file, suffix)
431
+
432
+
433
+ def check_file(file, suffix=''):
434
+ # Search/download file (if necessary) and return path
435
+ check_suffix(file, suffix) # optional
436
+ file = str(file) # convert to str()
437
+ if Path(file).is_file() or not file: # exists
438
+ return file
439
+ elif file.startswith(('http:/', 'https:/')): # download
440
+ url = file # warning: Pathlib turns :// -> :/
441
+ file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth
442
+ if Path(file).is_file():
443
+ LOGGER.info(f'Found {url} locally at {file}') # file already exists
444
+ else:
445
+ LOGGER.info(f'Downloading {url} to {file}...')
446
+ torch.hub.download_url_to_file(url, file)
447
+ assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
448
+ return file
449
+ else: # search
450
+ files = []
451
+ for d in 'data', 'models', 'utils': # search directories
452
+ files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file
453
+ assert len(files), f'File not found: {file}' # assert file was found
454
+ assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique
455
+ return files[0] # return file
456
+
457
+
458
+ def check_font(font=FONT, progress=False):
459
+ # Download font to CONFIG_DIR if necessary
460
+ font = Path(font)
461
+ file = CONFIG_DIR / font.name
462
+ if not font.exists() and not file.exists():
463
+ url = "https://ultralytics.com/assets/" + font.name
464
+ LOGGER.info(f'Downloading {url} to {file}...')
465
+ torch.hub.download_url_to_file(url, str(file), progress=progress)
466
+
467
+
468
+ def check_dataset(data, autodownload=True):
469
+ # Download, check and/or unzip dataset if not found locally
470
+
471
+ # Download (optional)
472
+ extract_dir = ''
473
+ if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
474
+ download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False, threads=1)
475
+ data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml'))
476
+ extract_dir, autodownload = data.parent, False
477
+
478
+ # Read yaml (optional)
479
+ if isinstance(data, (str, Path)):
480
+ with open(data, errors='ignore') as f:
481
+ data = yaml.safe_load(f) # dictionary
482
+
483
+ # Checks
484
+ for k in 'train', 'val', 'nc':
485
+ assert k in data, emojis(f"data.yaml '{k}:' field missing ❌")
486
+ if 'names' not in data:
487
+ LOGGER.warning(emojis("data.yaml 'names:' field missing ⚠️, assigning default names 'class0', 'class1', etc."))
488
+ data['names'] = [f'class{i}' for i in range(data['nc'])] # default names
489
+
490
+ # Resolve paths
491
+ path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.'
492
+ if not path.is_absolute():
493
+ path = (ROOT / path).resolve()
494
+ for k in 'train', 'val', 'test':
495
+ if data.get(k): # prepend path
496
+ data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
497
+
498
+ # Parse yaml
499
+ train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
500
+ if val:
501
+ val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
502
+ if not all(x.exists() for x in val):
503
+ LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]))
504
+ if not s or not autodownload:
505
+ raise Exception(emojis('Dataset not found ❌'))
506
+ t = time.time()
507
+ root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
508
+ if s.startswith('http') and s.endswith('.zip'): # URL
509
+ f = Path(s).name # filename
510
+ LOGGER.info(f'Downloading {s} to {f}...')
511
+ torch.hub.download_url_to_file(s, f)
512
+ Path(root).mkdir(parents=True, exist_ok=True) # create root
513
+ ZipFile(f).extractall(path=root) # unzip
514
+ Path(f).unlink() # remove zip
515
+ r = None # success
516
+ elif s.startswith('bash '): # bash script
517
+ LOGGER.info(f'Running {s} ...')
518
+ r = os.system(s)
519
+ else: # python script
520
+ r = exec(s, {'yaml': data}) # return None
521
+ dt = f'({round(time.time() - t, 1)}s)'
522
+ s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
523
+ LOGGER.info(emojis(f"Dataset download {s}"))
524
+ check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts
525
+ return data # dictionary
526
+
527
+
528
+ def check_amp(model):
529
+ # Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation
530
+ from asone.detectors.yolov5.utils.common import AutoShape, DetectMultiBackend
531
+
532
+ def amp_allclose(model, im):
533
+ # All close FP32 vs AMP results
534
+ m = AutoShape(model, verbose=False) # model
535
+ a = m(im).xywhn[0] # FP32 inference
536
+ m.amp = True
537
+ b = m(im).xywhn[0] # AMP inference
538
+ return a.shape == b.shape and torch.allclose(a, b, atol=0.1) # close to 10% absolute tolerance
539
+
540
+ prefix = colorstr('AMP: ')
541
+ device = next(model.parameters()).device # get model device
542
+ if device.type == 'cpu':
543
+ return False # AMP disabled on CPU
544
+ f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check
545
+ im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3))
546
+ try:
547
+ assert amp_allclose(model, im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
548
+ LOGGER.info(emojis(f'{prefix}checks passed ✅'))
549
+ return True
550
+ except Exception:
551
+ help_url = 'https://github.com/ultralytics/yolov5/issues/7908'
552
+ LOGGER.warning(emojis(f'{prefix}checks failed ❌, disabling Automatic Mixed Precision. See {help_url}'))
553
+ return False
554
+
555
+
556
+ def url2file(url):
557
+ # Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
558
+ url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
559
+ return Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth
560
+
561
+
562
+ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry=3):
563
+ # Multi-threaded file download and unzip function, used in data.yaml for autodownload
564
+ def download_one(url, dir):
565
+ # Download 1 file
566
+ success = True
567
+ f = dir / Path(url).name # filename
568
+ if Path(url).is_file(): # exists in current path
569
+ Path(url).rename(f) # move to dir
570
+ elif not f.exists():
571
+ LOGGER.info(f'Downloading {url} to {f}...')
572
+ for i in range(retry + 1):
573
+ if curl:
574
+ s = 'sS' if threads > 1 else '' # silent
575
+ r = os.system(f'curl -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue
576
+ success = r == 0
577
+ else:
578
+ torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download
579
+ success = f.is_file()
580
+ if success:
581
+ break
582
+ elif i < retry:
583
+ LOGGER.warning(f'Download failure, retrying {i + 1}/{retry} {url}...')
584
+ else:
585
+ LOGGER.warning(f'Failed to download {url}...')
586
+
587
+ if unzip and success and f.suffix in ('.zip', '.gz'):
588
+ LOGGER.info(f'Unzipping {f}...')
589
+ if f.suffix == '.zip':
590
+ ZipFile(f).extractall(path=dir) # unzip
591
+ elif f.suffix == '.gz':
592
+ os.system(f'tar xfz {f} --directory {f.parent}') # unzip
593
+ if delete:
594
+ f.unlink() # remove zip
595
+
596
+ dir = Path(dir)
597
+ dir.mkdir(parents=True, exist_ok=True) # make directory
598
+ if threads > 1:
599
+ pool = ThreadPool(threads)
600
+ pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
601
+ pool.close()
602
+ pool.join()
603
+ else:
604
+ for u in [url] if isinstance(url, (str, Path)) else url:
605
+ download_one(u, dir)
606
+
607
+
608
+ def make_divisible(x, divisor):
609
+ # Returns nearest x divisible by divisor
610
+ if isinstance(divisor, torch.Tensor):
611
+ divisor = int(divisor.max()) # to int
612
+ return math.ceil(x / divisor) * divisor
613
+
614
+
615
+ def clean_str(s):
616
+ # Cleans a string by replacing special characters with underscore _
617
+ return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
618
+
619
+
620
+ def one_cycle(y1=0.0, y2=1.0, steps=100):
621
+ # lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
622
+ return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
623
+
624
+
625
+ def colorstr(*input):
626
+ # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
627
+ *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
628
+ colors = {
629
+ 'black': '\033[30m', # basic colors
630
+ 'red': '\033[31m',
631
+ 'green': '\033[32m',
632
+ 'yellow': '\033[33m',
633
+ 'blue': '\033[34m',
634
+ 'magenta': '\033[35m',
635
+ 'cyan': '\033[36m',
636
+ 'white': '\033[37m',
637
+ 'bright_black': '\033[90m', # bright colors
638
+ 'bright_red': '\033[91m',
639
+ 'bright_green': '\033[92m',
640
+ 'bright_yellow': '\033[93m',
641
+ 'bright_blue': '\033[94m',
642
+ 'bright_magenta': '\033[95m',
643
+ 'bright_cyan': '\033[96m',
644
+ 'bright_white': '\033[97m',
645
+ 'end': '\033[0m', # misc
646
+ 'bold': '\033[1m',
647
+ 'underline': '\033[4m'}
648
+ return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
649
+
650
+
651
+ def labels_to_class_weights(labels, nc=80):
652
+ # Get class weights (inverse frequency) from training labels
653
+ if labels[0] is None: # no labels loaded
654
+ return torch.Tensor()
655
+
656
+ labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
657
+ classes = labels[:, 0].astype(int) # labels = [class xywh]
658
+ weights = np.bincount(classes, minlength=nc) # occurrences per class
659
+
660
+ # Prepend gridpoint count (for uCE training)
661
+ # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
662
+ # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
663
+
664
+ weights[weights == 0] = 1 # replace empty bins with 1
665
+ weights = 1 / weights # number of targets per class
666
+ weights /= weights.sum() # normalize
667
+ return torch.from_numpy(weights).float()
668
+
669
+
670
+ def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
671
+ # Produces image weights based on class_weights and image contents
672
+ # Usage: index = random.choices(range(n), weights=image_weights, k=1) # weighted image sample
673
+ class_counts = np.array([np.bincount(x[:, 0].astype(int), minlength=nc) for x in labels])
674
+ return (class_weights.reshape(1, nc) * class_counts).sum(1)
675
+
676
+
677
+ def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
678
+ # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
679
+ # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
680
+ # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
681
+ # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
682
+ # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
683
+ return [
684
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
685
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
686
+ 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
687
+
688
+
689
+ def xyxy2xywh(x):
690
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
691
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
692
+ y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
693
+ y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
694
+ y[:, 2] = x[:, 2] - x[:, 0] # width
695
+ y[:, 3] = x[:, 3] - x[:, 1] # height
696
+ return y
697
+
698
+
699
+ def xywh2xyxy(x):
700
+ # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
701
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
702
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
703
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
704
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
705
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
706
+ return y
707
+
708
+
709
+ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
710
+ # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
711
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
712
+ y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
713
+ y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y
714
+ y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x
715
+ y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y
716
+ return y
717
+
718
+
719
+ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
720
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
721
+ if clip:
722
+ clip_coords(x, (h - eps, w - eps)) # warning: inplace clip
723
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
724
+ y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
725
+ y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
726
+ y[:, 2] = (x[:, 2] - x[:, 0]) / w # width
727
+ y[:, 3] = (x[:, 3] - x[:, 1]) / h # height
728
+ return y
729
+
730
+
731
+ def xyn2xy(x, w=640, h=640, padw=0, padh=0):
732
+ # Convert normalized segments into pixel segments, shape (n,2)
733
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
734
+ y[:, 0] = w * x[:, 0] + padw # top left x
735
+ y[:, 1] = h * x[:, 1] + padh # top left y
736
+ return y
737
+
738
+
739
+ def segment2box(segment, width=640, height=640):
740
+ # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
741
+ x, y = segment.T # segment xy
742
+ inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
743
+ x, y, = x[inside], y[inside]
744
+ return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy
745
+
746
+
747
+ def segments2boxes(segments):
748
+ # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
749
+ boxes = []
750
+ for s in segments:
751
+ x, y = s.T # segment xy
752
+ boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
753
+ return xyxy2xywh(np.array(boxes)) # cls, xywh
754
+
755
+
756
+ def resample_segments(segments, n=1000):
757
+ # Up-sample an (n,2) segment
758
+ for i, s in enumerate(segments):
759
+ s = np.concatenate((s, s[0:1, :]), axis=0)
760
+ x = np.linspace(0, len(s) - 1, n)
761
+ xp = np.arange(len(s))
762
+ segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy
763
+ return segments
764
+
765
+
766
+ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
767
+ # Rescale coords (xyxy) from img1_shape to img0_shape
768
+ if ratio_pad is None: # calculate from img0_shape
769
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
770
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
771
+ else:
772
+ gain = ratio_pad[0][0]
773
+ pad = ratio_pad[1]
774
+
775
+ coords[:, [0, 2]] -= pad[0] # x padding
776
+ coords[:, [1, 3]] -= pad[1] # y padding
777
+ coords[:, :4] /= gain
778
+ clip_coords(coords, img0_shape)
779
+ return coords
780
+
781
+
782
+ def clip_coords(boxes, shape):
783
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
784
+ if isinstance(boxes, torch.Tensor): # faster individually
785
+ boxes[:, 0].clamp_(0, shape[1]) # x1
786
+ boxes[:, 1].clamp_(0, shape[0]) # y1
787
+ boxes[:, 2].clamp_(0, shape[1]) # x2
788
+ boxes[:, 3].clamp_(0, shape[0]) # y2
789
+ else: # np.array (faster grouped)
790
+ boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
791
+ boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
792
+
793
+
794
+ def non_max_suppression(prediction,
795
+ conf_thres=0.25,
796
+ iou_thres=0.45,
797
+ classes=None,
798
+ agnostic=False,
799
+ multi_label=False,
800
+ labels=(),
801
+ max_det=300):
802
+ """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
803
+
804
+ Returns:
805
+ list of detections, on (n,6) tensor per image [xyxy, conf, cls]
806
+ """
807
+
808
+ bs = prediction.shape[0] # batch size
809
+ nc = prediction.shape[2] - 5 # number of classes
810
+ xc = prediction[..., 4] > conf_thres # candidates
811
+
812
+ # Checks
813
+ assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
814
+ assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
815
+
816
+ # Settings
817
+ # min_wh = 2 # (pixels) minimum box width and height
818
+ max_wh = 7680 # (pixels) maximum box width and height
819
+ max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
820
+ time_limit = 0.3 + 0.03 * bs # seconds to quit after
821
+ redundant = True # require redundant detections
822
+ multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
823
+ merge = False # use merge-NMS
824
+
825
+ t = time.time()
826
+ output = [torch.zeros((0, 6), device=prediction.device)] * bs
827
+ for xi, x in enumerate(prediction): # image index, image inference
828
+ # Apply constraints
829
+ # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
830
+ x = x[xc[xi]] # confidence
831
+
832
+ # Cat apriori labels if autolabelling
833
+ if labels and len(labels[xi]):
834
+ lb = labels[xi]
835
+ v = torch.zeros((len(lb), nc + 5), device=x.device)
836
+ v[:, :4] = lb[:, 1:5] # box
837
+ v[:, 4] = 1.0 # conf
838
+ v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
839
+ x = torch.cat((x, v), 0)
840
+
841
+ # If none remain process next image
842
+ if not x.shape[0]:
843
+ continue
844
+
845
+ # Compute conf
846
+ x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
847
+
848
+ # Box (center x, center y, width, height) to (x1, y1, x2, y2)
849
+ box = xywh2xyxy(x[:, :4])
850
+
851
+ # Detections matrix nx6 (xyxy, conf, cls)
852
+ if multi_label:
853
+ i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
854
+ x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
855
+ else: # best class only
856
+ conf, j = x[:, 5:].max(1, keepdim=True)
857
+ x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
858
+
859
+ # Filter by class
860
+ if classes is not None:
861
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
862
+
863
+ # Apply finite constraint
864
+ # if not torch.isfinite(x).all():
865
+ # x = x[torch.isfinite(x).all(1)]
866
+
867
+ # Check shape
868
+ n = x.shape[0] # number of boxes
869
+ if not n: # no boxes
870
+ continue
871
+ elif n > max_nms: # excess boxes
872
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
873
+
874
+ # Batched NMS
875
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
876
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
877
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
878
+ if i.shape[0] > max_det: # limit detections
879
+ i = i[:max_det]
880
+ if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
881
+ # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
882
+ iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
883
+ weights = iou * scores[None] # box weights
884
+ x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
885
+ if redundant:
886
+ i = i[iou.sum(1) > 1] # require redundancy
887
+
888
+ output[xi] = x[i]
889
+ if (time.time() - t) > time_limit:
890
+ LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
891
+ break # time limit exceeded
892
+
893
+ return output
894
+
895
+
896
+ def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer()
897
+ # Strip optimizer from 'f' to finalize training, optionally save as 's'
898
+ x = torch.load(f, map_location=torch.device('cpu'))
899
+ if x.get('ema'):
900
+ x['model'] = x['ema'] # replace model with ema
901
+ for k in 'optimizer', 'best_fitness', 'wandb_id', 'ema', 'updates': # keys
902
+ x[k] = None
903
+ x['epoch'] = -1
904
+ x['model'].half() # to FP16
905
+ for p in x['model'].parameters():
906
+ p.requires_grad = False
907
+ torch.save(x, s or f)
908
+ mb = os.path.getsize(s or f) / 1E6 # filesize
909
+ LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
910
+
911
+
912
+ def print_mutation(results, hyp, save_dir, bucket, prefix=colorstr('evolve: ')):
913
+ evolve_csv = save_dir / 'evolve.csv'
914
+ evolve_yaml = save_dir / 'hyp_evolve.yaml'
915
+ keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss',
916
+ 'val/obj_loss', 'val/cls_loss') + tuple(hyp.keys()) # [results + hyps]
917
+ keys = tuple(x.strip() for x in keys)
918
+ vals = results + tuple(hyp.values())
919
+ n = len(keys)
920
+
921
+ # Download (optional)
922
+ if bucket:
923
+ url = f'gs://{bucket}/evolve.csv'
924
+ if gsutil_getsize(url) > (evolve_csv.stat().st_size if evolve_csv.exists() else 0):
925
+ os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local
926
+
927
+ # Log to evolve.csv
928
+ s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header
929
+ with open(evolve_csv, 'a') as f:
930
+ f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n')
931
+
932
+ # Save yaml
933
+ with open(evolve_yaml, 'w') as f:
934
+ data = pd.read_csv(evolve_csv)
935
+ data = data.rename(columns=lambda x: x.strip()) # strip keys
936
+ i = np.argmax(fitness(data.values[:, :4])) #
937
+ generations = len(data)
938
+ f.write('# YOLOv5 Hyperparameter Evolution Results\n' + f'# Best generation: {i}\n' +
939
+ f'# Last generation: {generations - 1}\n' + '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) +
940
+ '\n' + '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n')
941
+ yaml.safe_dump(data.loc[i][7:].to_dict(), f, sort_keys=False)
942
+
943
+ # Print to screen
944
+ LOGGER.info(prefix + f'{generations} generations finished, current result:\n' + prefix +
945
+ ', '.join(f'{x.strip():>20s}' for x in keys) + '\n' + prefix + ', '.join(f'{x:20.5g}'
946
+ for x in vals) + '\n\n')
947
+
948
+ if bucket:
949
+ os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload
950
+
951
+
952
+ def apply_classifier(x, model, img, im0):
953
+ # Apply a second stage classifier to YOLO outputs
954
+ # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval()
955
+ im0 = [im0] if isinstance(im0, np.ndarray) else im0
956
+ for i, d in enumerate(x): # per image
957
+ if d is not None and len(d):
958
+ d = d.clone()
959
+
960
+ # Reshape and pad cutouts
961
+ b = xyxy2xywh(d[:, :4]) # boxes
962
+ b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square
963
+ b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad
964
+ d[:, :4] = xywh2xyxy(b).long()
965
+
966
+ # Rescale boxes from img_size to im0 size
967
+ scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
968
+
969
+ # Classes
970
+ pred_cls1 = d[:, 5].long()
971
+ ims = []
972
+ for a in d:
973
+ cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
974
+ im = cv2.resize(cutout, (224, 224)) # BGR
975
+
976
+ im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
977
+ im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32
978
+ im /= 255 # 0 - 255 to 0.0 - 1.0
979
+ ims.append(im)
980
+
981
+ pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction
982
+ x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections
983
+
984
+ return x
985
+
986
+
987
+ def increment_path(path, exist_ok=False, sep='', mkdir=False):
988
+ # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
989
+ path = Path(path) # os-agnostic
990
+ if path.exists() and not exist_ok:
991
+ path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
992
+
993
+ # Method 1
994
+ for n in range(2, 9999):
995
+ p = f'{path}{sep}{n}{suffix}' # increment path
996
+ if not os.path.exists(p): #
997
+ break
998
+ path = Path(p)
999
+
1000
+ # Method 2 (deprecated)
1001
+ # dirs = glob.glob(f"{path}{sep}*") # similar paths
1002
+ # matches = [re.search(rf"{path.stem}{sep}(\d+)", d) for d in dirs]
1003
+ # i = [int(m.groups()[0]) for m in matches if m] # indices
1004
+ # n = max(i) + 1 if i else 2 # increment number
1005
+ # path = Path(f"{path}{sep}{n}{suffix}") # increment path
1006
+
1007
+ if mkdir:
1008
+ path.mkdir(parents=True, exist_ok=True) # make directory
1009
+
1010
+ return path
1011
+
1012
+
1013
+ # OpenCV Chinese-friendly functions ------------------------------------------------------------------------------------
1014
+ imshow_ = cv2.imshow # copy to avoid recursion errors
1015
+
1016
+
1017
+ def imread(path, flags=cv2.IMREAD_COLOR):
1018
+ return cv2.imdecode(np.fromfile(path, np.uint8), flags)
1019
+
1020
+
1021
+ def imwrite(path, im):
1022
+ try:
1023
+ cv2.imencode(Path(path).suffix, im)[1].tofile(path)
1024
+ return True
1025
+ except Exception:
1026
+ return False
1027
+
1028
+
1029
+ def imshow(path, im):
1030
+ imshow_(path.encode('unicode_escape').decode(), im)
1031
+
1032
+
1033
+ cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow # redefine
1034
+
1035
+ # Variables ------------------------------------------------------------------------------------------------------------
1036
+ NCOLS = 0 if is_docker() else shutil.get_terminal_size().columns # terminal window size for tqdm
asone/detectors/yolov5/yolov5/models/yolo.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
+ """
3
+ YOLO-specific modules
4
+
5
+ Usage:
6
+ $ python path/to/models/yolo.py --cfg yolov5s.yaml
7
+ """
8
+
9
+ import argparse
10
+ import contextlib
11
+ import os
12
+ import platform
13
+ import sys
14
+ from copy import deepcopy
15
+ from pathlib import Path
16
+
17
+ FILE = Path(__file__).resolve()
18
+ ROOT = FILE.parents[1] # YOLOv5 root directory
19
+ # if str(ROOT) not in sys.path:
20
+ # sys.path.append(str(ROOT)) # add ROOT to PATH
21
+ if platform.system() != 'Windows':
22
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
23
+
24
+ from asone.detectors.yolov5.yolov5.models.common import *
25
+ from asone.detectors.yolov5.yolov5.models.experimental import *
26
+ from asone.detectors.yolov5.yolov5.models.general import (LOGGER, check_version,
27
+ check_yaml, make_divisible,
28
+ print_args)
29
+ from asone.detectors.yolov5.yolov5.utils.torch_utils import (
30
+ fuse_conv_and_bn,
31
+ initialize_weights,
32
+ model_info,
33
+ profile,
34
+ scale_img,
35
+ select_device,
36
+ time_sync)
37
+
38
+ try:
39
+ import thop # for FLOPs computation
40
+ except ImportError:
41
+ thop = None
42
+
43
+
44
+ class Detect(nn.Module):
45
+ stride = None # strides computed during build
46
+ onnx_dynamic = False # ONNX export parameter
47
+ export = False # export mode
48
+
49
+ def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
50
+ super().__init__()
51
+ self.nc = nc # number of classes
52
+ self.no = nc + 5 # number of outputs per anchor
53
+ self.nl = len(anchors) # number of detection layers
54
+ self.na = len(anchors[0]) // 2 # number of anchors
55
+ self.grid = [torch.zeros(1)] * self.nl # init grid
56
+ self.anchor_grid = [torch.zeros(1)] * self.nl # init anchor grid
57
+ self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
58
+ self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
59
+ self.inplace = inplace # use inplace ops (e.g. slice assignment)
60
+
61
+ def forward(self, x):
62
+ z = [] # inference output
63
+ for i in range(self.nl):
64
+ x[i] = self.m[i](x[i]) # conv
65
+ bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
66
+ x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
67
+
68
+ if not self.training: # inference
69
+ if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
70
+ self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
71
+
72
+ y = x[i].sigmoid()
73
+ if self.inplace:
74
+ y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy
75
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
76
+ else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
77
+ xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0
78
+ xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
79
+ wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
80
+ y = torch.cat((xy, wh, conf), 4)
81
+ z.append(y.view(bs, -1, self.no))
82
+
83
+ return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
84
+
85
+ def _make_grid(self, nx=20, ny=20, i=0):
86
+ d = self.anchors[i].device
87
+ t = self.anchors[i].dtype
88
+ shape = 1, self.na, ny, nx, 2 # grid shape
89
+ y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
90
+ if check_version(torch.__version__, '1.10.0'): # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility
91
+ yv, xv = torch.meshgrid(y, x, indexing='ij')
92
+ else:
93
+ yv, xv = torch.meshgrid(y, x)
94
+ grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
95
+ anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
96
+ return grid, anchor_grid
97
+
98
+
99
+ class DetectionModel(nn.Module):
100
+ # YOLOv5 model
101
+ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
102
+ super().__init__()
103
+ if isinstance(cfg, dict):
104
+ self.yaml = cfg # model dict
105
+ else: # is *.yaml
106
+ import yaml # for torch hub
107
+ self.yaml_file = Path(cfg).name
108
+ with open(cfg, encoding='ascii', errors='ignore') as f:
109
+ self.yaml = yaml.safe_load(f) # model dict
110
+
111
+ # Define model
112
+ ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
113
+ if nc and nc != self.yaml['nc']:
114
+ LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
115
+ self.yaml['nc'] = nc # override yaml value
116
+ if anchors:
117
+ LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
118
+ self.yaml['anchors'] = round(anchors) # override yaml value
119
+ self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
120
+ self.names = [str(i) for i in range(self.yaml['nc'])] # default names
121
+ self.inplace = self.yaml.get('inplace', True)
122
+
123
+ # Build strides, anchors
124
+ m = self.model[-1] # Detect()
125
+ if isinstance(m, Detect):
126
+ s = 256 # 2x min stride
127
+ m.inplace = self.inplace
128
+ m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
129
+ check_anchor_order(m) # must be in pixel-space (not grid-space)
130
+ m.anchors /= m.stride.view(-1, 1, 1)
131
+ self.stride = m.stride
132
+ self._initialize_biases() # only run once
133
+
134
+ # Init weights, biases
135
+ initialize_weights(self)
136
+ self.info()
137
+ LOGGER.info('')
138
+
139
+ def forward(self, x, augment=False, profile=False, visualize=False):
140
+ if augment:
141
+ return self._forward_augment(x) # augmented inference, None
142
+ return self._forward_once(x, profile, visualize) # single-scale inference, train
143
+
144
+ def _forward_augment(self, x):
145
+ img_size = x.shape[-2:] # height, width
146
+ s = [1, 0.83, 0.67] # scales
147
+ f = [None, 3, None] # flips (2-ud, 3-lr)
148
+ y = [] # outputs
149
+ for si, fi in zip(s, f):
150
+ xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
151
+ yi = self._forward_once(xi)[0] # forward
152
+ # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
153
+ yi = self._descale_pred(yi, fi, si, img_size)
154
+ y.append(yi)
155
+ y = self._clip_augmented(y) # clip augmented tails
156
+ return torch.cat(y, 1), None # augmented inference, train
157
+
158
+ def _forward_once(self, x, profile=False, visualize=False):
159
+ y, dt = [], [] # outputs
160
+ for m in self.model:
161
+ if m.f != -1: # if not from previous layer
162
+ x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
163
+ if profile:
164
+ self._profile_one_layer(m, x, dt)
165
+ x = m(x) # run
166
+ y.append(x if m.i in self.save else None) # save output
167
+ if visualize:
168
+ feature_visualization(x, m.type, m.i, save_dir=visualize)
169
+ return x
170
+
171
+ def _descale_pred(self, p, flips, scale, img_size):
172
+ # de-scale predictions following augmented inference (inverse operation)
173
+ if self.inplace:
174
+ p[..., :4] /= scale # de-scale
175
+ if flips == 2:
176
+ p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
177
+ elif flips == 3:
178
+ p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
179
+ else:
180
+ x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
181
+ if flips == 2:
182
+ y = img_size[0] - y # de-flip ud
183
+ elif flips == 3:
184
+ x = img_size[1] - x # de-flip lr
185
+ p = torch.cat((x, y, wh, p[..., 4:]), -1)
186
+ return p
187
+
188
+ def _clip_augmented(self, y):
189
+ # Clip YOLOv5 augmented inference tails
190
+ nl = self.model[-1].nl # number of detection layers (P3-P5)
191
+ g = sum(4 ** x for x in range(nl)) # grid points
192
+ e = 1 # exclude layer count
193
+ i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices
194
+ y[0] = y[0][:, :-i] # large
195
+ i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
196
+ y[-1] = y[-1][:, i:] # small
197
+ return y
198
+
199
+ def _profile_one_layer(self, m, x, dt):
200
+ c = isinstance(m, Detect) # is final layer, copy input as inplace fix
201
+ o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
202
+ t = time_sync()
203
+ for _ in range(10):
204
+ m(x.copy() if c else x)
205
+ dt.append((time_sync() - t) * 100)
206
+ if m == self.model[0]:
207
+ LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
208
+ LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
209
+ if c:
210
+ LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
211
+
212
+ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
213
+ # https://arxiv.org/abs/1708.02002 section 3.3
214
+ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
215
+ m = self.model[-1] # Detect() module
216
+ for mi, s in zip(m.m, m.stride): # from
217
+ b = mi.bias.view(m.na, -1).detach() # conv.bias(255) to (3,85)
218
+ b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
219
+ b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls
220
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
221
+
222
+ def _print_biases(self):
223
+ m = self.model[-1] # Detect() module
224
+ for mi in m.m: # from
225
+ b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
226
+ LOGGER.info(
227
+ ('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
228
+
229
+ def _print_weights(self):
230
+ for m in self.model.modules():
231
+ if type(m) is Bottleneck:
232
+ LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
233
+
234
+ def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
235
+ # LOGGER.info('Fusing layers... ')
236
+ for m in self.model.modules():
237
+ if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
238
+ m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
239
+ delattr(m, 'bn') # remove batchnorm
240
+ m.forward = m.forward_fuse # update forward
241
+ # self.info()
242
+ return self
243
+
244
+ def info(self, verbose=False, img_size=640): # print model information
245
+ model_info(self, verbose, img_size)
246
+
247
+ def _apply(self, fn):
248
+ # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
249
+ self = super()._apply(fn)
250
+ m = self.model[-1] # Detect()
251
+ if isinstance(m, Detect):
252
+ m.stride = fn(m.stride)
253
+ m.grid = list(map(fn, m.grid))
254
+ if isinstance(m.anchor_grid, list):
255
+ m.anchor_grid = list(map(fn, m.anchor_grid))
256
+ return self
257
+
258
+ Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
259
+
260
+
261
+ def parse_model(d, ch): # model_dict, input_channels(3)
262
+ LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
263
+ anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
264
+ na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
265
+ no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
266
+
267
+ layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
268
+ for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
269
+ m = eval(m) if isinstance(m, str) else m # eval strings
270
+ for j, a in enumerate(args):
271
+ with contextlib.suppress(NameError):
272
+ args[j] = eval(a) if isinstance(a, str) else a # eval strings
273
+
274
+ n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
275
+ if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
276
+ BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x):
277
+ c1, c2 = ch[f], args[0]
278
+ if c2 != no: # if not output
279
+ c2 = make_divisible(c2 * gw, 8)
280
+
281
+ args = [c1, c2, *args[1:]]
282
+ if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]:
283
+ args.insert(2, n) # number of repeats
284
+ n = 1
285
+ elif m is nn.BatchNorm2d:
286
+ args = [ch[f]]
287
+ elif m is Concat:
288
+ c2 = sum(ch[x] for x in f)
289
+ elif m is Detect:
290
+ args.append([ch[x] for x in f])
291
+ if isinstance(args[1], int): # number of anchors
292
+ args[1] = [list(range(args[1] * 2))] * len(f)
293
+ elif m is Contract:
294
+ c2 = ch[f] * args[0] ** 2
295
+ elif m is Expand:
296
+ c2 = ch[f] // args[0] ** 2
297
+ else:
298
+ c2 = ch[f]
299
+
300
+ m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
301
+ t = str(m)[8:-2].replace('__main__.', '') # module type
302
+ np = sum(x.numel() for x in m_.parameters()) # number params
303
+ m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
304
+ LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
305
+ save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
306
+ layers.append(m_)
307
+ if i == 0:
308
+ ch = []
309
+ ch.append(c2)
310
+ return nn.Sequential(*layers), sorted(save)
311
+
312
+
313
+ if __name__ == '__main__':
314
+ parser = argparse.ArgumentParser()
315
+ parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
316
+ parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
317
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
318
+ parser.add_argument('--profile', action='store_true', help='profile model speed')
319
+ parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer')
320
+ parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
321
+ opt = parser.parse_args()
322
+ opt.cfg = check_yaml(opt.cfg) # check YAML
323
+ print_args(vars(opt))
324
+ device = select_device(opt.device)
325
+
326
+ # Create model
327
+ im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
328
+ model = Model(opt.cfg).to(device)
329
+
330
+ # Options
331
+ if opt.line_profile: # profile layer by layer
332
+ _ = model(im, profile=True)
333
+
334
+ elif opt.profile: # profile forward-backward
335
+ results = profile(input=im, ops=[model], n=3)
336
+
337
+ elif opt.test: # test all models
338
+ for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
339
+ try:
340
+ _ = Model(cfg)
341
+ except Exception as e:
342
+ print(f'Error in {cfg}: {e}')
343
+
344
+ else: # report fused model summary
345
+ model.fuse()
asone/detectors/yolov5/yolov5/utils/__init__.py ADDED
File without changes
asone/detectors/yolov5/yolov5/utils/torch_utils.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
2
+ """
3
+ PyTorch utils
4
+ """
5
+
6
+ import math
7
+ import os
8
+ import platform
9
+ import subprocess
10
+ import time
11
+ import warnings
12
+ from contextlib import contextmanager
13
+ from copy import deepcopy
14
+ from pathlib import Path
15
+
16
+ import torch
17
+ import torch.distributed as dist
18
+ import torch.nn as nn
19
+ import torch.nn.functional as F
20
+ from torch.nn.parallel import DistributedDataParallel as DDP
21
+
22
+ # from utils.general import LOGGER, check_version, colorstr, file_date, git_describe
23
+
24
+ LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
25
+ RANK = int(os.getenv('RANK', -1))
26
+ WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
27
+
28
+ try:
29
+ import thop # for FLOPs computation
30
+ except ImportError:
31
+ thop = None
32
+
33
+ # Suppress PyTorch warnings
34
+ warnings.filterwarnings('ignore', message='User provided device_type of \'cuda\', but CUDA is not available. Disabling')
35
+
36
+
37
+ def smart_DDP(model):
38
+ # Model DDP creation with checks
39
+ assert not check_version(torch.__version__, '1.12.0', pinned=True), \
40
+ 'torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. ' \
41
+ 'Please upgrade or downgrade torch to use DDP. See https://github.com/ultralytics/yolov5/issues/8395'
42
+ if check_version(torch.__version__, '1.11.0'):
43
+ return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
44
+ else:
45
+ return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
46
+
47
+
48
+ @contextmanager
49
+ def torch_distributed_zero_first(local_rank: int):
50
+ # Decorator to make all processes in distributed training wait for each local_master to do something
51
+ if local_rank not in [-1, 0]:
52
+ dist.barrier(device_ids=[local_rank])
53
+ yield
54
+ if local_rank == 0:
55
+ dist.barrier(device_ids=[0])
56
+
57
+
58
+ def device_count():
59
+ # Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Supports Linux and Windows
60
+ assert platform.system() in ('Linux', 'Windows'), 'device_count() only supported on Linux or Windows'
61
+ try:
62
+ cmd = 'nvidia-smi -L | wc -l' if platform.system() == 'Linux' else 'nvidia-smi -L | find /c /v ""' # Windows
63
+ return int(subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1])
64
+ except Exception:
65
+ return 0
66
+
67
+
68
+ def select_device(device='', batch_size=0, newline=True):
69
+ # device = None or 'cpu' or 0 or '0' or '0,1,2,3'
70
+ s = f'YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} '
71
+ device = str(device).strip().lower().replace('cuda:', '').replace('none', '') # to string, 'cuda:0' to '0'
72
+ cpu = device == 'cpu'
73
+ mps = device == 'mps' # Apple Metal Performance Shaders (MPS)
74
+ if cpu or mps:
75
+ os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
76
+ elif device: # non-cpu device requested
77
+ os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available()
78
+ assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \
79
+ f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)"
80
+
81
+ if not (cpu or mps) and torch.cuda.is_available(): # prefer GPU if available
82
+ devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7
83
+ n = len(devices) # device count
84
+ if n > 1 and batch_size > 0: # check batch_size is divisible by device_count
85
+ assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
86
+ space = ' ' * (len(s) + 1)
87
+ for i, d in enumerate(devices):
88
+ p = torch.cuda.get_device_properties(i)
89
+ s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
90
+ arg = 'cuda:0'
91
+ elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available(): # prefer MPS if available
92
+ s += 'MPS\n'
93
+ arg = 'mps'
94
+ else: # revert to CPU
95
+ s += 'CPU\n'
96
+ arg = 'cpu'
97
+
98
+ if not newline:
99
+ s = s.rstrip()
100
+ LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe
101
+ return torch.device(arg)
102
+
103
+
104
+ def time_sync():
105
+ # PyTorch-accurate time
106
+ if torch.cuda.is_available():
107
+ torch.cuda.synchronize()
108
+ return time.time()
109
+
110
+
111
+ def profile(input, ops, n=10, device=None):
112
+ results = []
113
+ if not isinstance(device, torch.device):
114
+ device = select_device(device)
115
+ print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
116
+ f"{'input':>24s}{'output':>24s}")
117
+
118
+ for x in input if isinstance(input, list) else [input]:
119
+ x = x.to(device)
120
+ x.requires_grad = True
121
+ for m in ops if isinstance(ops, list) else [ops]:
122
+ m = m.to(device) if hasattr(m, 'to') else m # device
123
+ m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
124
+ tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward
125
+ try:
126
+ flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs
127
+ except Exception:
128
+ flops = 0
129
+
130
+ try:
131
+ for _ in range(n):
132
+ t[0] = time_sync()
133
+ y = m(x)
134
+ t[1] = time_sync()
135
+ try:
136
+ _ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
137
+ t[2] = time_sync()
138
+ except Exception: # no backward method
139
+ # print(e) # for debug
140
+ t[2] = float('nan')
141
+ tf += (t[1] - t[0]) * 1000 / n # ms per op forward
142
+ tb += (t[2] - t[1]) * 1000 / n # ms per op backward
143
+ mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB)
144
+ s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes
145
+ p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters
146
+ print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}')
147
+ results.append([p, flops, mem, tf, tb, s_in, s_out])
148
+ except Exception as e:
149
+ print(e)
150
+ results.append(None)
151
+ torch.cuda.empty_cache()
152
+ return results
153
+
154
+
155
+ def is_parallel(model):
156
+ # Returns True if model is of type DP or DDP
157
+ return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
158
+
159
+
160
+ def de_parallel(model):
161
+ # De-parallelize a model: returns single-GPU model if model is of type DP or DDP
162
+ return model.module if is_parallel(model) else model
163
+
164
+
165
+ def initialize_weights(model):
166
+ for m in model.modules():
167
+ t = type(m)
168
+ if t is nn.Conv2d:
169
+ pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
170
+ elif t is nn.BatchNorm2d:
171
+ m.eps = 1e-3
172
+ m.momentum = 0.03
173
+ elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
174
+ m.inplace = True
175
+
176
+
177
+ def find_modules(model, mclass=nn.Conv2d):
178
+ # Finds layer indices matching module class 'mclass'
179
+ return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
180
+
181
+
182
+ def sparsity(model):
183
+ # Return global model sparsity
184
+ a, b = 0, 0
185
+ for p in model.parameters():
186
+ a += p.numel()
187
+ b += (p == 0).sum()
188
+ return b / a
189
+
190
+
191
+ def prune(model, amount=0.3):
192
+ # Prune model to requested global sparsity
193
+ import torch.nn.utils.prune as prune
194
+ print('Pruning model... ', end='')
195
+ for name, m in model.named_modules():
196
+ if isinstance(m, nn.Conv2d):
197
+ prune.l1_unstructured(m, name='weight', amount=amount) # prune
198
+ prune.remove(m, 'weight') # make permanent
199
+ print(' %.3g global sparsity' % sparsity(model))
200
+
201
+
202
+ def fuse_conv_and_bn(conv, bn):
203
+ # Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
204
+ fusedconv = nn.Conv2d(conv.in_channels,
205
+ conv.out_channels,
206
+ kernel_size=conv.kernel_size,
207
+ stride=conv.stride,
208
+ padding=conv.padding,
209
+ groups=conv.groups,
210
+ bias=True).requires_grad_(False).to(conv.weight.device)
211
+
212
+ # Prepare filters
213
+ w_conv = conv.weight.clone().view(conv.out_channels, -1)
214
+ w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
215
+ fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
216
+
217
+ # Prepare spatial bias
218
+ b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
219
+ b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
220
+ fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
221
+
222
+ return fusedconv
223
+
224
+
225
+ def model_info(model, verbose=False, img_size=640):
226
+ # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
227
+ n_p = sum(x.numel() for x in model.parameters()) # number parameters
228
+ n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
229
+ if verbose:
230
+ print(f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}")
231
+ for i, (name, p) in enumerate(model.named_parameters()):
232
+ name = name.replace('module_list.', '')
233
+ print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
234
+ (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
235
+
236
+ try: # FLOPs
237
+ from thop import profile
238
+ stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
239
+ img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input
240
+ flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs
241
+ img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float
242
+ fs = ', %.1f GFLOPs' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPs
243
+ except Exception:
244
+ fs = ''
245
+
246
+ name = Path(model.yaml_file).stem.replace('yolov5', 'YOLOv5') if hasattr(model, 'yaml_file') else 'Model'
247
+ # LOGGER.info(f"{name} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
248
+
249
+
250
+ def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
251
+ # Scales img(bs,3,y,x) by ratio constrained to gs-multiple
252
+ if ratio == 1.0:
253
+ return img
254
+ h, w = img.shape[2:]
255
+ s = (int(h * ratio), int(w * ratio)) # new size
256
+ img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
257
+ if not same_shape: # pad/crop img
258
+ h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w))
259
+ return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
260
+
261
+
262
+ def copy_attr(a, b, include=(), exclude=()):
263
+ # Copy attributes from b to a, options to only include [...] and to exclude [...]
264
+ for k, v in b.__dict__.items():
265
+ if (len(include) and k not in include) or k.startswith('_') or k in exclude:
266
+ continue
267
+ else:
268
+ setattr(a, k, v)
269
+
270
+
271
+ def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, weight_decay=1e-5):
272
+ # YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay
273
+ g = [], [], [] # optimizer parameter groups
274
+ bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d()
275
+ for v in model.modules():
276
+ if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias (no decay)
277
+ g[2].append(v.bias)
278
+ if isinstance(v, bn): # weight (no decay)
279
+ g[1].append(v.weight)
280
+ elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
281
+ g[0].append(v.weight)
282
+
283
+ if name == 'Adam':
284
+ optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999)) # adjust beta1 to momentum
285
+ elif name == 'AdamW':
286
+ optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
287
+ elif name == 'RMSProp':
288
+ optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum)
289
+ elif name == 'SGD':
290
+ optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
291
+ else:
292
+ raise NotImplementedError(f'Optimizer {name} not implemented.')
293
+
294
+ optimizer.add_param_group({'params': g[0], 'weight_decay': weight_decay}) # add g0 with weight_decay
295
+ optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights)
296
+ LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
297
+ f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
298
+ return optimizer
299
+
300
+
301
+ class EarlyStopping:
302
+ # YOLOv5 simple early stopper
303
+ def __init__(self, patience=30):
304
+ self.best_fitness = 0.0 # i.e. mAP
305
+ self.best_epoch = 0
306
+ self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop
307
+ self.possible_stop = False # possible stop may occur next epoch
308
+
309
+ def __call__(self, epoch, fitness):
310
+ if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training
311
+ self.best_epoch = epoch
312
+ self.best_fitness = fitness
313
+ delta = epoch - self.best_epoch # epochs without improvement
314
+ self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch
315
+ stop = delta >= self.patience # stop training if patience exceeded
316
+ if stop:
317
+ LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. '
318
+ f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n'
319
+ f'To update EarlyStopping(patience={self.patience}) pass a new patience value, '
320
+ f'i.e. `python train.py --patience 300` or use `--patience 0` to disable EarlyStopping.')
321
+ return stop
322
+
323
+
324
+ class ModelEMA:
325
+ """ Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models
326
+ Keeps a moving average of everything in the model state_dict (parameters and buffers)
327
+ For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
328
+ """
329
+
330
+ def __init__(self, model, decay=0.9999, tau=2000, updates=0):
331
+ # Create EMA
332
+ self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA
333
+ # if next(model.parameters()).device.type != 'cpu':
334
+ # self.ema.half() # FP16 EMA
335
+ self.updates = updates # number of EMA updates
336
+ self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs)
337
+ for p in self.ema.parameters():
338
+ p.requires_grad_(False)
339
+
340
+ def update(self, model):
341
+ # Update EMA parameters
342
+ with torch.no_grad():
343
+ self.updates += 1
344
+ d = self.decay(self.updates)
345
+
346
+ msd = de_parallel(model).state_dict() # model state_dict
347
+ for k, v in self.ema.state_dict().items():
348
+ if v.dtype.is_floating_point:
349
+ v *= d
350
+ v += (1 - d) * msd[k].detach()
351
+
352
+ def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
353
+ # Update EMA attributes
354
+ copy_attr(self.ema, model, include, exclude)
asone/detectors/yolov5/yolov5/utils/yolov5_utils.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextlib
2
+ import time
3
+ import numpy as np
4
+ import torch
5
+ import torchvision
6
+ import cv2
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+
12
+ def box_area(box):
13
+ # box = xyxy(4,n)
14
+ return (box[2] - box[0]) * (box[3] - box[1])
15
+
16
+
17
+ def box_iou(box1, box2, eps=1e-7):
18
+ # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
19
+ """
20
+ Return intersection-over-union (Jaccard index) of boxes.
21
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
22
+ Arguments:
23
+ box1 (Tensor[N, 4])
24
+ box2 (Tensor[M, 4])
25
+ Returns:
26
+ iou (Tensor[N, M]): the NxM matrix containing the pairwise
27
+ IoU values for every element in boxes1 and boxes2
28
+ """
29
+
30
+ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
31
+ (a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
32
+ inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
33
+
34
+ # IoU = inter / (area1 + area2 - inter)
35
+ return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps)
36
+
37
+ def xywh2xyxy(x):
38
+ # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
39
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
40
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
41
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
42
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
43
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
44
+ return y
45
+
46
+ def non_max_suppression(prediction,
47
+ conf_thres=0.25,
48
+ iou_thres=0.45,
49
+ classes=None,
50
+ agnostic=False,
51
+ multi_label=False,
52
+ labels=(),
53
+ max_det=300):
54
+ """Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
55
+ Returns:
56
+ list of detections, on (n,6) tensor per image [xyxy, conf, cls]
57
+ """
58
+ # prediction = torch.Tensor(prediction)
59
+ bs = prediction.shape[0] # batch size
60
+ nc = prediction.shape[2] - 5 # number of classes
61
+ xc = prediction[..., 4] > conf_thres # candidates
62
+ # Checks
63
+ assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
64
+ assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
65
+
66
+ # Settings
67
+ # min_wh = 2 # (pixels) minimum box width and height
68
+ max_wh = 7680 # (pixels) maximum box width and height
69
+ max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
70
+ time_limit = 0.3 + 0.03 * bs # seconds to quit after
71
+ redundant = True # require redundant detections
72
+ multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
73
+ merge = False # use merge-NMS
74
+
75
+ t = time.time()
76
+ output = [torch.zeros((0, 6), device=prediction.device)] * bs
77
+ for xi, x in enumerate(prediction): # image index, image inference
78
+ # Apply constraints
79
+ # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
80
+ x = x[xc[xi]] # confidence
81
+
82
+ # Cat apriori labels if autolabelling
83
+ if labels and len(labels[xi]):
84
+ lb = labels[xi]
85
+ v = torch.zeros((len(lb), nc + 5), device=x.device)
86
+ v[:, :4] = lb[:, 1:5] # box
87
+ v[:, 4] = 1.0 # conf
88
+ v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
89
+ x = torch.cat((x, v), 0)
90
+
91
+ # If none remain process next image
92
+ if not x.shape[0]:
93
+ continue
94
+
95
+ # Compute conf
96
+ x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
97
+
98
+ # Box (center x, center y, width, height) to (x1, y1, x2, y2)
99
+ # print(type(x))
100
+ box = xywh2xyxy(x[:, :4])
101
+
102
+ # Detections matrix nx6 (xyxy, conf, cls)
103
+ if multi_label:
104
+ i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
105
+ x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
106
+ else: # best class only
107
+ conf, j = x[:, 5:].max(1, keepdim=True)
108
+ x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
109
+
110
+ # Filter by class
111
+ if classes is not None:
112
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
113
+
114
+ # Apply finite constraint
115
+ # if not torch.isfinite(x).all():
116
+ # x = x[torch.isfinite(x).all(1)]
117
+
118
+ # Check shape
119
+ n = x.shape[0] # number of boxes
120
+ if not n: # no boxes
121
+ continue
122
+ elif n > max_nms: # excess boxes
123
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
124
+
125
+ # Batched NMS
126
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
127
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
128
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
129
+ if i.shape[0] > max_det: # limit detections
130
+ i = i[:max_det]
131
+ if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
132
+ # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
133
+ iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
134
+ weights = iou * scores[None] # box weights
135
+ x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
136
+ if redundant:
137
+ i = i[iou.sum(1) > 1] # require redundancy
138
+
139
+ output[xi] = x[i]
140
+ if (time.time() - t) > time_limit:
141
+ # LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
142
+ break # time limit exceeded
143
+
144
+ return output
145
+
146
+ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
147
+ # Resize and pad image while meeting stride-multiple constraints
148
+ shape = im.shape[:2] # current shape [height, width]
149
+ if isinstance(new_shape, int):
150
+ new_shape = (new_shape, new_shape)
151
+
152
+ # Scale ratio (new / old)
153
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
154
+ if not scaleup: # only scale down, do not scale up (for better val mAP)
155
+ r = min(r, 1.0)
156
+
157
+ # Compute padding
158
+ ratio = r, r # width, height ratios
159
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
160
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
161
+ if auto: # minimum rectangle
162
+ dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
163
+ elif scaleFill: # stretch
164
+ dw, dh = 0.0, 0.0
165
+ new_unpad = (new_shape[1], new_shape[0])
166
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
167
+
168
+ dw /= 2 # divide padding into 2 sides
169
+ dh /= 2
170
+
171
+ if shape[::-1] != new_unpad: # resize
172
+ im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
173
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
174
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
175
+ im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
176
+ return im, ratio, (dw, dh)
177
+
178
+
179
+
180
+ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
181
+ # Rescale coords (xyxy) from img1_shape to img0_shape
182
+ if ratio_pad is None: # calculate from img0_shape
183
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
184
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
185
+ else:
186
+ gain = ratio_pad[0][0]
187
+ pad = ratio_pad[1]
188
+
189
+ coords[:, [0, 2]] -= pad[0] # x padding
190
+ coords[:, [1, 3]] -= pad[1] # y padding
191
+ coords[:, :4] /= gain
192
+ clip_coords(coords, img0_shape)
193
+ return coords
194
+
195
+
196
+ def clip_coords(boxes, shape):
197
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
198
+ if isinstance(boxes, torch.Tensor): # faster individually
199
+ boxes[:, 0].clamp_(0, shape[1]) # x1
200
+ boxes[:, 1].clamp_(0, shape[0]) # y1
201
+ boxes[:, 2].clamp_(0, shape[1]) # x2
202
+ boxes[:, 3].clamp_(0, shape[0]) # y2
203
+ else: # np.array (faster grouped)
204
+ boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
205
+ boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
206
+
207
+ @contextlib.contextmanager
208
+ def yolov5_in_syspath():
209
+ """
210
+ Temporarily add yolov5 folder to `sys.path`.
211
+
212
+ torch.hub handles it in the same way: https://github.com/pytorch/pytorch/blob/75024e228ca441290b6a1c2e564300ad507d7af6/torch/hub.py#L387
213
+
214
+ Proper fix for: #22, #134, #353, #1155, #1389, #1680, #2531, #3071
215
+ No need for such workarounds: #869, #1052, #2949
216
+ """
217
+ yolov5_folder_dir = str(Path(__file__).parents[1].absolute())
218
+ try:
219
+ sys.path.insert(0, yolov5_folder_dir)
220
+ yield
221
+ finally:
222
+ sys.path.remove(yolov5_folder_dir)
asone/detectors/yolov5/yolov5_detector.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from asone.utils import get_names
3
+ import numpy as np
4
+ import warnings
5
+ import torch
6
+ import onnxruntime
7
+
8
+ from asone.detectors.yolov5.yolov5.utils.yolov5_utils import (non_max_suppression,
9
+ scale_coords,
10
+ letterbox)
11
+ from asone.detectors.yolov5.yolov5.models.experimental import attempt_load
12
+ from asone import utils
13
+
14
+ class YOLOv5Detector:
15
+ def __init__(self,
16
+ weights=None,
17
+ use_onnx=False,
18
+ use_cuda=True):
19
+
20
+ self.use_onnx = use_onnx
21
+ self.device = 'cuda' if use_cuda else 'cpu'
22
+
23
+ if not os.path.exists(weights):
24
+ utils.download_weights(weights)
25
+
26
+ # Load Model
27
+ self.model = self.load_model(use_cuda, weights)
28
+
29
+ def load_model(self, use_cuda, weights, fp16=False):
30
+ # Device: CUDA and if fp16=True only then half precision floating point works
31
+ self.fp16 = fp16 & ((not self.use_onnx or self.use_onnx) and self.device != 'cpu')
32
+ # Load onnx
33
+ if self.use_onnx:
34
+ if use_cuda:
35
+ providers = ['CUDAExecutionProvider','CPUExecutionProvider']
36
+ else:
37
+ providers = ['CPUExecutionProvider']
38
+ model = onnxruntime.InferenceSession(weights, providers=providers)
39
+ #Load Pytorch
40
+ else:
41
+ model = attempt_load(weights, device=self.device, inplace=True, fuse=True)
42
+ model.half() if self.fp16 else model.float()
43
+ return model
44
+
45
+ def image_preprocessing(self,
46
+ image: list,
47
+ input_shape=(640, 640))-> list:
48
+
49
+ original_image = image.copy()
50
+ image = letterbox(image, input_shape, stride=32, auto=False)[0]
51
+ image = image.transpose((2, 0, 1))[::-1]
52
+ image = np.ascontiguousarray(image, dtype=np.float32)
53
+ image /= 255 # 0 - 255 to 0.0 - 1.0
54
+ if len(image.shape) == 3:
55
+ image = image[None] # expand for batch dim
56
+ return original_image, image
57
+
58
+ def detect(self, image: list,
59
+ input_shape: tuple = (640, 640),
60
+ conf_thres: float = 0.25,
61
+ iou_thres: float = 0.45,
62
+ max_det: int = 1000,
63
+ filter_classes: bool = None,
64
+ agnostic_nms: bool = True,
65
+ with_p6: bool = False) -> list:
66
+
67
+ # Image Preprocessing
68
+ original_image, processed_image = self.image_preprocessing(image, input_shape)
69
+
70
+ # Inference
71
+ if self.use_onnx:
72
+ # Input names of ONNX model on which it is exported
73
+ input_name = self.model.get_inputs()[0].name
74
+ # Run onnx model
75
+ pred = self.model.run([self.model.get_outputs()[0].name], {input_name: processed_image})[0]
76
+ # Run Pytorch model
77
+ else:
78
+ processed_image = torch.from_numpy(processed_image).to(self.device)
79
+ # Change image floating point precision if fp16 set to true
80
+ processed_image = processed_image.half() if self.fp16 else processed_image.float()
81
+ pred = self.model(processed_image, augment=False, visualize=False)[0]
82
+
83
+ # Post Processing
84
+ if isinstance(pred, np.ndarray):
85
+ pred = torch.tensor(pred, device=self.device)
86
+ predictions = non_max_suppression(pred, conf_thres,
87
+ iou_thres,
88
+ agnostic=agnostic_nms,
89
+ max_det=max_det)
90
+
91
+ for i, prediction in enumerate(predictions): # per image
92
+ if len(prediction):
93
+ prediction[:, :4] = scale_coords(
94
+ processed_image.shape[2:], prediction[:, :4], original_image.shape).round()
95
+ predictions[i] = prediction
96
+ detections = predictions[0].cpu().numpy()
97
+ image_info = {
98
+ 'width': original_image.shape[1],
99
+ 'height': original_image.shape[0],
100
+ }
101
+
102
+ self.boxes = detections[:, :4]
103
+ self.scores = detections[:, 4:5]
104
+ self.class_ids = detections[:, 5:6]
105
+
106
+ if filter_classes:
107
+ class_names = get_names()
108
+
109
+ filter_class_idx = []
110
+ if filter_classes:
111
+ for _class in filter_classes:
112
+ if _class.lower() in class_names:
113
+ filter_class_idx.append(class_names.index(_class.lower()))
114
+ else:
115
+ warnings.warn(f"class {_class} not found in model classes list.")
116
+
117
+ detections = detections[np.in1d(detections[:,5].astype(int), filter_class_idx)]
118
+
119
+ return detections, image_info
120
+
121
+
asone/detectors/yolov6/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .yolov6_detector import YOLOv6Detector
2
+ __all__ = ['YOLOv6Detector']
asone/detectors/yolov6/yolov6/__init__.py ADDED
File without changes
asone/detectors/yolov6/yolov6/assigners/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .atss_assigner import ATSSAssigner
2
+ from .tal_assigner import TaskAlignedAssigner