Spaces:
Build error
Build error
Bhaskar Saranga
commited on
Commit
•
59c3a37
1
Parent(s):
fd7f189
Added AsOne
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +9 -0
- Dockerfile +36 -0
- app.py +44 -6
- asone/__init__.py +114 -0
- asone/asone.py +195 -0
- asone/demo_detector.py +85 -0
- asone/demo_tracker.py +101 -0
- asone/detectors/__init__.py +13 -0
- asone/detectors/detector.py +92 -0
- asone/detectors/utils/__init__.py +0 -0
- asone/detectors/utils/cfg_path.py +18 -0
- asone/detectors/utils/exp_name.py +32 -0
- asone/detectors/utils/weights_path.py +117 -0
- asone/detectors/yolor/__init__.py +2 -0
- asone/detectors/yolor/cfg/yolor_csp.cfg +1376 -0
- asone/detectors/yolor/cfg/yolor_csp_x.cfg +1576 -0
- asone/detectors/yolor/cfg/yolor_p6.cfg +1760 -0
- asone/detectors/yolor/models/__init__.py +1 -0
- asone/detectors/yolor/models/common.py +1023 -0
- asone/detectors/yolor/models/export.py +68 -0
- asone/detectors/yolor/models/models.py +761 -0
- asone/detectors/yolor/utils/__init__.py +1 -0
- asone/detectors/yolor/utils/activations.py +72 -0
- asone/detectors/yolor/utils/autoanchor.py +152 -0
- asone/detectors/yolor/utils/datasets.py +1297 -0
- asone/detectors/yolor/utils/export.py +80 -0
- asone/detectors/yolor/utils/general.py +449 -0
- asone/detectors/yolor/utils/google_utils.py +132 -0
- asone/detectors/yolor/utils/layers.py +532 -0
- asone/detectors/yolor/utils/loss.py +173 -0
- asone/detectors/yolor/utils/metrics.py +140 -0
- asone/detectors/yolor/utils/parse_config.py +71 -0
- asone/detectors/yolor/utils/plots.py +380 -0
- asone/detectors/yolor/utils/torch_utils.py +240 -0
- asone/detectors/yolor/utils/yolor_utils.py +206 -0
- asone/detectors/yolor/yolor_detector.py +138 -0
- asone/detectors/yolov5/__init__.py +2 -0
- asone/detectors/yolov5/yolov5/__init__.py +0 -0
- asone/detectors/yolov5/yolov5/models/__init__.py +3 -0
- asone/detectors/yolov5/yolov5/models/common.py +756 -0
- asone/detectors/yolov5/yolov5/models/experimental.py +56 -0
- asone/detectors/yolov5/yolov5/models/general.py +1036 -0
- asone/detectors/yolov5/yolov5/models/yolo.py +345 -0
- asone/detectors/yolov5/yolov5/utils/__init__.py +0 -0
- asone/detectors/yolov5/yolov5/utils/torch_utils.py +354 -0
- asone/detectors/yolov5/yolov5/utils/yolov5_utils.py +222 -0
- asone/detectors/yolov5/yolov5_detector.py +121 -0
- asone/detectors/yolov6/__init__.py +2 -0
- asone/detectors/yolov6/yolov6/__init__.py +0 -0
- asone/detectors/yolov6/yolov6/assigners/__init__.py +2 -0
.dockerignore
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env/
|
2 |
+
results/
|
3 |
+
**__pycache__**
|
4 |
+
*.onnx
|
5 |
+
*.pt
|
6 |
+
**byte_track_results**
|
7 |
+
**deep_sort_results**
|
8 |
+
**nor_fair_results**
|
9 |
+
test_env/
|
Dockerfile
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM pytorch/pytorch:latest
|
2 |
+
|
3 |
+
# Set Time Zone to prevent issues for installing some apt packages
|
4 |
+
ENV TZ=Europe/Minsk
|
5 |
+
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
6 |
+
|
7 |
+
# install apt packages
|
8 |
+
RUN apt-get update -y
|
9 |
+
RUN apt-get install git gcc \
|
10 |
+
g++ python3-opencv \
|
11 |
+
vim -y
|
12 |
+
|
13 |
+
RUN mkdir /app
|
14 |
+
WORKDIR /app
|
15 |
+
|
16 |
+
ADD asone asone
|
17 |
+
|
18 |
+
ADD sample_videos sample_videos
|
19 |
+
ADD main.py main.py
|
20 |
+
# ADD demo.py demo.py
|
21 |
+
|
22 |
+
ADD setup.py setup.py
|
23 |
+
ADD requirements.txt requirements.txt
|
24 |
+
|
25 |
+
|
26 |
+
RUN pip3 install Cython numpy
|
27 |
+
RUN pip3 install cython-bbox
|
28 |
+
ADD pypi_README.md pypi_README.md
|
29 |
+
|
30 |
+
RUN pip3 install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
|
31 |
+
RUN pip3 install .
|
32 |
+
|
33 |
+
|
34 |
+
WORKDIR /workspace
|
35 |
+
# Entry Point
|
36 |
+
CMD /bin/bash
|
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import torch
|
2 |
import gradio as gr
|
3 |
import cv2
|
@@ -11,6 +12,8 @@ from utils.plots import plot_one_box
|
|
11 |
from utils.torch_utils import time_synchronized
|
12 |
import time
|
13 |
from ultralytics import YOLO
|
|
|
|
|
14 |
|
15 |
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
|
16 |
# Resize and pad image while meeting stride-multiple constraints
|
@@ -173,7 +176,31 @@ def inference2(video,model_link,iou_threshold,confidence_threshold):
|
|
173 |
finalVideo.release()
|
174 |
return 'output.mp4',np.mean(fps_video)
|
175 |
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
examples_images = ['data/images/1.jpg',
|
179 |
'data/images/2.jpg',
|
@@ -206,11 +233,19 @@ with gr.Blocks() as demo:
|
|
206 |
video_iou_threshold = gr.Slider(label="IOU Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.45)
|
207 |
video_conf_threshold = gr.Slider(label="Confidence Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.25)
|
208 |
gr.Examples(examples=examples_videos,inputs=video_input,outputs=video_output)
|
209 |
-
video_button = gr.Button("Detect")
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
text_button.click(inference, inputs=[image_input,image_drop,
|
216 |
image_iou_threshold,image_conf_threshold],
|
@@ -218,5 +253,8 @@ with gr.Blocks() as demo:
|
|
218 |
video_button.click(inference2, inputs=[video_input,video_drop,
|
219 |
video_iou_threshold,video_conf_threshold],
|
220 |
outputs=[video_output,fps_video])
|
|
|
|
|
|
|
221 |
|
222 |
demo.launch(debug=True,enable_queue=True)
|
|
|
1 |
+
import os
|
2 |
import torch
|
3 |
import gradio as gr
|
4 |
import cv2
|
|
|
12 |
from utils.torch_utils import time_synchronized
|
13 |
import time
|
14 |
from ultralytics import YOLO
|
15 |
+
import asone
|
16 |
+
from asone import ASOne
|
17 |
|
18 |
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
|
19 |
# Resize and pad image while meeting stride-multiple constraints
|
|
|
176 |
finalVideo.release()
|
177 |
return 'output.mp4',np.mean(fps_video)
|
178 |
|
179 |
+
def inference3(video,model_link,iou_threshold,confidence_threshold):
|
180 |
+
model_path = 'weights/'+str(model_link)+'.pt'
|
181 |
+
device = torch.cuda.is_available()
|
182 |
+
dt_obj = ASOne(
|
183 |
+
tracker=asone.BYTETRACK,
|
184 |
+
detector=asone.YOLOV8M_PYTORCH,
|
185 |
+
weights=model_path,
|
186 |
+
use_cuda=device
|
187 |
+
)
|
188 |
+
track_fn = dt_obj.track_video(video,
|
189 |
+
conf_thres=confidence_threshold,
|
190 |
+
iou_thres=iou_threshold,
|
191 |
+
display=False,
|
192 |
+
draw_trails=None,
|
193 |
+
filter_classes=None,
|
194 |
+
class_names=None) # class_names=['License Plate'] for custom weights
|
195 |
+
fps_a=[]
|
196 |
+
for bbox_details, frame_details in track_fn:
|
197 |
+
#bbox_xyxy, ids, scores, class_ids = bbox_details
|
198 |
+
frame, frame_num, fps = frame_details
|
199 |
+
fps_a.append(fps)
|
200 |
+
print(frame_num)
|
201 |
+
|
202 |
+
file_name=os.path.basename(video)
|
203 |
+
return f'data/results/{file_name}', np.mean(fps_a)
|
204 |
|
205 |
examples_images = ['data/images/1.jpg',
|
206 |
'data/images/2.jpg',
|
|
|
233 |
video_iou_threshold = gr.Slider(label="IOU Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.45)
|
234 |
video_conf_threshold = gr.Slider(label="Confidence Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.25)
|
235 |
gr.Examples(examples=examples_videos,inputs=video_input,outputs=video_output)
|
236 |
+
video_button = gr.Button("Detect")
|
237 |
+
with gr.Tab("Tracking"):
|
238 |
+
gr.Markdown("## Multi object tracking")
|
239 |
+
|
240 |
+
with gr.Row():
|
241 |
+
track_input = gr.Video(type='pil', label="Input Video", source="upload")
|
242 |
+
track_output = gr.Video(type="pil", label="Output Video",format="mp4")
|
243 |
+
track_fps_video = gr.Number(0,label='FPS')
|
244 |
+
track_drop = gr.Dropdown(choices=models,value=models[0])
|
245 |
+
track_iou_threshold = gr.Slider(label="IOU Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.45)
|
246 |
+
track_conf_threshold = gr.Slider(label="Confidence Threshold",interactive=True, minimum=0.0, maximum=1.0, value=0.25)
|
247 |
+
gr.Examples(examples=examples_videos,inputs=track_input,outputs=track_output)
|
248 |
+
track_button = gr.Button("Detect")
|
249 |
|
250 |
text_button.click(inference, inputs=[image_input,image_drop,
|
251 |
image_iou_threshold,image_conf_threshold],
|
|
|
253 |
video_button.click(inference2, inputs=[video_input,video_drop,
|
254 |
video_iou_threshold,video_conf_threshold],
|
255 |
outputs=[video_output,fps_video])
|
256 |
+
track_button.click(inference3, inputs=[track_input,track_drop,
|
257 |
+
track_iou_threshold,track_conf_threshold],
|
258 |
+
outputs=[track_output,track_fps_video])
|
259 |
|
260 |
demo.launch(debug=True,enable_queue=True)
|
asone/__init__.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .asone import ASOne
|
2 |
+
import asone.detectors
|
3 |
+
import asone.trackers
|
4 |
+
|
5 |
+
|
6 |
+
BYTETRACK = 0
|
7 |
+
DEEPSORT = 1
|
8 |
+
NORFAIR = 2
|
9 |
+
|
10 |
+
|
11 |
+
YOLOV5X6_PYTORCH = 0
|
12 |
+
YOLOV5S_PYTORCH = 2
|
13 |
+
YOLOV5N_PYTORCH = 4
|
14 |
+
YOLOV5M_PYTORCH = 6
|
15 |
+
YOLOV5L_PYTORCH = 8
|
16 |
+
YOLOV5X_PYTORCH = 10
|
17 |
+
YOLOV5N6_PYTORCH = 12
|
18 |
+
YOLOV5S6_PYTORCH = 14
|
19 |
+
YOLOV5M6_PYTORCH = 16
|
20 |
+
YOLOV5L6_PYTORCH = 18
|
21 |
+
|
22 |
+
|
23 |
+
YOLOV6N_PYTORCH = 20
|
24 |
+
YOLOV6T_PYTORCH = 22
|
25 |
+
YOLOV6S_PYTORCH = 24
|
26 |
+
YOLOV6M_PYTORCH = 26
|
27 |
+
YOLOV6L_PYTORCH = 28
|
28 |
+
YOLOV6L_RELU_PYTORCH = 30
|
29 |
+
YOLOV6S_REPOPT_PYTORCH = 32
|
30 |
+
|
31 |
+
YOLOV7_TINY_PYTORCH = 34
|
32 |
+
YOLOV7_PYTORCH = 36
|
33 |
+
YOLOV7_X_PYTORCH = 38
|
34 |
+
YOLOV7_W6_PYTORCH = 40
|
35 |
+
YOLOV7_E6_PYTORCH = 42
|
36 |
+
YOLOV7_D6_PYTORCH = 44
|
37 |
+
YOLOV7_E6E_PYTORCH = 46
|
38 |
+
|
39 |
+
YOLOR_CSP_X_PYTORCH = 48
|
40 |
+
YOLOR_CSP_X_STAR_PYTORCH = 50
|
41 |
+
YOLOR_CSP_STAR_PYTORCH = 52
|
42 |
+
YOLOR_CSP_PYTORCH = 54
|
43 |
+
YOLOR_P6_PYTORCH = 56
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
YOLOX_L_PYTORCH = 58
|
49 |
+
YOLOX_NANO_PYTORCH = 60
|
50 |
+
YOLOX_TINY_PYTORCH = 62
|
51 |
+
YOLOX_DARKNET_PYTORCH = 64
|
52 |
+
YOLOX_S_PYTORCH = 66
|
53 |
+
YOLOX_M_PYTORCH = 68
|
54 |
+
YOLOX_X_PYTORCH = 70
|
55 |
+
|
56 |
+
#ONNX
|
57 |
+
|
58 |
+
YOLOV5X6_ONNX = 1
|
59 |
+
YOLOV5S_ONNX = 3
|
60 |
+
YOLOV5N_ONNX = 5
|
61 |
+
YOLOV5M_ONNX = 7
|
62 |
+
YOLOV5L_ONNX = 9
|
63 |
+
YOLOV5X_ONNX = 11
|
64 |
+
YOLOV5N6_ONNX = 13
|
65 |
+
YOLOV5S6_ONNX = 15
|
66 |
+
YOLOV5M6_ONNX = 17
|
67 |
+
YOLOV5L6_ONNX = 19
|
68 |
+
|
69 |
+
|
70 |
+
YOLOV6N_ONNX = 21
|
71 |
+
YOLOV6T_ONNX = 23
|
72 |
+
YOLOV6S_ONNX = 25
|
73 |
+
YOLOV6M_ONNX = 27
|
74 |
+
YOLOV6L_ONNX = 29
|
75 |
+
YOLOV6L_RELU_ONNX = 31
|
76 |
+
YOLOV6S_REPOPT_ONNX = 33
|
77 |
+
|
78 |
+
YOLOV7_TINY_ONNX = 35
|
79 |
+
YOLOV7_ONNX = 37
|
80 |
+
YOLOV7_X_ONNX = 39
|
81 |
+
YOLOV7_W6_ONNX = 41
|
82 |
+
YOLOV7_E6_ONNX = 43
|
83 |
+
YOLOV7_D6_ONNX = 45
|
84 |
+
YOLOV7_E6E_ONNX = 47
|
85 |
+
|
86 |
+
YOLOR_CSP_X_ONNX = 49
|
87 |
+
YOLOR_CSP_X_STAR_ONNX = 51
|
88 |
+
YOLOR_CSP_STAR_ONNX = 53
|
89 |
+
YOLOR_CSP_ONNX = 55
|
90 |
+
YOLOR_P6_ONNX = 57
|
91 |
+
|
92 |
+
|
93 |
+
YOLOX_L_ONNX = 59
|
94 |
+
YOLOX_NANO_ONNX = 61
|
95 |
+
YOLOX_TINY_ONNX = 63
|
96 |
+
YOLOX_DARKNET_ONNX = 65
|
97 |
+
YOLOX_S_ONNX = 67
|
98 |
+
YOLOX_M_ONNX = 69
|
99 |
+
YOLOX_X_ONNX = 71
|
100 |
+
|
101 |
+
# YOLOv8
|
102 |
+
YOLOV8N_PYTORCH = 72
|
103 |
+
YOLOV8N_ONNX = 73
|
104 |
+
YOLOV8S_PYTORCH = 74
|
105 |
+
YOLOV8S_ONNX = 75
|
106 |
+
YOLOV8M_PYTORCH = 76
|
107 |
+
YOLOV8M_ONNX = 77
|
108 |
+
YOLOV8L_PYTORCH = 78
|
109 |
+
YOLOV8L_ONNX = 79
|
110 |
+
YOLOV8X_PYTORCH = 80
|
111 |
+
YOLOV8X_ONNX = 81
|
112 |
+
|
113 |
+
|
114 |
+
__all__ = ['ASOne', 'detectors', 'trackers']
|
asone/asone.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import cv2
|
3 |
+
from loguru import logger
|
4 |
+
import os
|
5 |
+
import time
|
6 |
+
import asone.utils as utils
|
7 |
+
from asone.trackers import Tracker
|
8 |
+
from asone.detectors import Detector
|
9 |
+
from asone.utils.default_cfg import config
|
10 |
+
import numpy as np
|
11 |
+
|
12 |
+
class ASOne:
|
13 |
+
def __init__(self,
|
14 |
+
detector: int = 0,
|
15 |
+
tracker: int = -1,
|
16 |
+
weights: str = None,
|
17 |
+
use_cuda: bool = True) -> None:
|
18 |
+
|
19 |
+
self.use_cuda = use_cuda
|
20 |
+
|
21 |
+
# get detector object
|
22 |
+
self.detector = self.get_detector(detector, weights)
|
23 |
+
|
24 |
+
if tracker == -1:
|
25 |
+
self.tracker = None
|
26 |
+
return
|
27 |
+
|
28 |
+
self.tracker = self.get_tracker(tracker)
|
29 |
+
|
30 |
+
|
31 |
+
def get_detector(self, detector: int, weights: str):
|
32 |
+
detector = Detector(detector, weights=weights,
|
33 |
+
use_cuda=self.use_cuda).get_detector()
|
34 |
+
return detector
|
35 |
+
|
36 |
+
def get_tracker(self, tracker: int):
|
37 |
+
|
38 |
+
tracker = Tracker(tracker, self.detector,
|
39 |
+
use_cuda=self.use_cuda)
|
40 |
+
return tracker
|
41 |
+
|
42 |
+
def _update_args(self, kwargs):
|
43 |
+
for key, value in kwargs.items():
|
44 |
+
if key in config.keys():
|
45 |
+
config[key] = value
|
46 |
+
else:
|
47 |
+
print(f'"{key}" argument not found! valid args: {list(config.keys())}')
|
48 |
+
exit()
|
49 |
+
return config
|
50 |
+
|
51 |
+
def track_stream(self,
|
52 |
+
stream_url,
|
53 |
+
**kwargs
|
54 |
+
):
|
55 |
+
|
56 |
+
output_filename = 'result.mp4'
|
57 |
+
kwargs['filename'] = output_filename
|
58 |
+
config = self._update_args(kwargs)
|
59 |
+
|
60 |
+
for (bbox_details, frame_details) in self._start_tracking(stream_url, config):
|
61 |
+
# yeild bbox_details, frame_details to main script
|
62 |
+
yield bbox_details, frame_details
|
63 |
+
|
64 |
+
|
65 |
+
def track_video(self,
|
66 |
+
video_path,
|
67 |
+
**kwargs
|
68 |
+
):
|
69 |
+
output_filename = os.path.basename(video_path)
|
70 |
+
kwargs['filename'] = output_filename
|
71 |
+
config = self._update_args(kwargs)
|
72 |
+
|
73 |
+
for (bbox_details, frame_details) in self._start_tracking(video_path, config):
|
74 |
+
# yeild bbox_details, frame_details to main script
|
75 |
+
yield bbox_details, frame_details
|
76 |
+
|
77 |
+
def detect(self, source, **kwargs)->np.ndarray:
|
78 |
+
""" Function to perform detection on an img
|
79 |
+
|
80 |
+
Args:
|
81 |
+
source (_type_): if str read the image. if nd.array pass it directly to detect
|
82 |
+
|
83 |
+
Returns:
|
84 |
+
_type_: ndarray of detection
|
85 |
+
"""
|
86 |
+
if isinstance(source, str):
|
87 |
+
source = cv2.imread(source)
|
88 |
+
return self.detector.detect(source, **kwargs)
|
89 |
+
|
90 |
+
def track_webcam(self,
|
91 |
+
cam_id=0,
|
92 |
+
**kwargs):
|
93 |
+
output_filename = 'results.mp4'
|
94 |
+
|
95 |
+
kwargs['filename'] = output_filename
|
96 |
+
kwargs['fps'] = 29
|
97 |
+
config = self._update_args(kwargs)
|
98 |
+
|
99 |
+
|
100 |
+
for (bbox_details, frame_details) in self._start_tracking(cam_id, config):
|
101 |
+
# yeild bbox_details, frame_details to main script
|
102 |
+
yield bbox_details, frame_details
|
103 |
+
|
104 |
+
def _start_tracking(self,
|
105 |
+
stream_path: str,
|
106 |
+
config: dict) -> tuple:
|
107 |
+
if not self.tracker:
|
108 |
+
print(f'No tracker is selected. use detect() function perform detcetion or pass a tracker.')
|
109 |
+
exit()
|
110 |
+
|
111 |
+
fps = config.pop('fps')
|
112 |
+
output_dir = config.pop('output_dir')
|
113 |
+
filename = config.pop('filename')
|
114 |
+
save_result = config.pop('save_result')
|
115 |
+
display = config.pop('display')
|
116 |
+
draw_trails = config.pop('draw_trails')
|
117 |
+
class_names = config.pop('class_names')
|
118 |
+
|
119 |
+
cap = cv2.VideoCapture(stream_path)
|
120 |
+
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
|
121 |
+
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
122 |
+
frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
|
123 |
+
|
124 |
+
if fps is None:
|
125 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
126 |
+
|
127 |
+
if save_result:
|
128 |
+
os.makedirs(output_dir, exist_ok=True)
|
129 |
+
save_path = os.path.join(output_dir, filename)
|
130 |
+
logger.info(f"video save path is {save_path}")
|
131 |
+
|
132 |
+
video_writer = cv2.VideoWriter(
|
133 |
+
save_path,
|
134 |
+
cv2.VideoWriter_fourcc(*"mp4v"),
|
135 |
+
fps,
|
136 |
+
(int(width), int(height)),
|
137 |
+
)
|
138 |
+
|
139 |
+
frame_id = 1
|
140 |
+
tic = time.time()
|
141 |
+
|
142 |
+
prevTime = 0
|
143 |
+
|
144 |
+
while True:
|
145 |
+
start_time = time.time()
|
146 |
+
|
147 |
+
ret, frame = cap.read()
|
148 |
+
if not ret:
|
149 |
+
break
|
150 |
+
im0 = copy.deepcopy(frame)
|
151 |
+
|
152 |
+
bboxes_xyxy, ids, scores, class_ids = self.tracker.detect_and_track(
|
153 |
+
frame, config)
|
154 |
+
elapsed_time = time.time() - start_time
|
155 |
+
|
156 |
+
logger.info(
|
157 |
+
'frame {}/{} ({:.2f} ms)'.format(frame_id, int(frame_count),
|
158 |
+
elapsed_time * 1000))
|
159 |
+
|
160 |
+
im0 = utils.draw_boxes(im0,
|
161 |
+
bboxes_xyxy,
|
162 |
+
class_ids,
|
163 |
+
identities=ids,
|
164 |
+
draw_trails=draw_trails,
|
165 |
+
class_names=class_names)
|
166 |
+
|
167 |
+
currTime = time.time()
|
168 |
+
fps = 1 / (currTime - prevTime)
|
169 |
+
prevTime = currTime
|
170 |
+
cv2.line(im0, (20, 25), (127, 25), [85, 45, 255], 30)
|
171 |
+
cv2.putText(im0, f'FPS: {int(fps)}', (11, 35), 0, 1, [
|
172 |
+
225, 255, 255], thickness=2, lineType=cv2.LINE_AA)
|
173 |
+
|
174 |
+
if display:
|
175 |
+
cv2.imshow(' Sample', im0)
|
176 |
+
if save_result:
|
177 |
+
video_writer.write(im0)
|
178 |
+
|
179 |
+
frame_id += 1
|
180 |
+
|
181 |
+
if cv2.waitKey(25) & 0xFF == ord('q'):
|
182 |
+
break
|
183 |
+
|
184 |
+
# yeild required values in form of (bbox_details, frames_details)
|
185 |
+
yield (bboxes_xyxy, ids, scores, class_ids), (im0 if display else frame, frame_id-1, fps)
|
186 |
+
|
187 |
+
tac = time.time()
|
188 |
+
print(f'Total Time Taken: {tac - tic:.2f}')
|
189 |
+
|
190 |
+
if __name__ == '__main__':
|
191 |
+
# asone = ASOne(tracker='norfair')
|
192 |
+
asone = ASOne()
|
193 |
+
|
194 |
+
asone.start_tracking('data/sample_videos/video2.mp4',
|
195 |
+
save_result=True, display=False)
|
asone/demo_detector.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asone
|
2 |
+
from asone import ASOne
|
3 |
+
from .utils import draw_boxes
|
4 |
+
import cv2
|
5 |
+
import argparse
|
6 |
+
import time
|
7 |
+
import os
|
8 |
+
|
9 |
+
def main(args):
|
10 |
+
filter_classes = args.filter_classes
|
11 |
+
video_path = args.video
|
12 |
+
|
13 |
+
os.makedirs(args.output_path, exist_ok=True)
|
14 |
+
|
15 |
+
if filter_classes:
|
16 |
+
filter_classes = filter_classes.split(',')
|
17 |
+
|
18 |
+
|
19 |
+
detector = ASOne(asone.YOLOV7_PYTORCH, weights=args.weights, use_cuda=args.use_cuda)
|
20 |
+
|
21 |
+
cap = cv2.VideoCapture(video_path)
|
22 |
+
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
|
23 |
+
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
24 |
+
FPS = cap.get(cv2.CAP_PROP_FPS)
|
25 |
+
|
26 |
+
if args.save:
|
27 |
+
video_writer = cv2.VideoWriter(
|
28 |
+
os.path.basename(video_path),
|
29 |
+
cv2.VideoWriter_fourcc(*"mp4v"),
|
30 |
+
FPS,
|
31 |
+
(int(width), int(height)),
|
32 |
+
)
|
33 |
+
|
34 |
+
frame_no = 1
|
35 |
+
tic = time.time()
|
36 |
+
|
37 |
+
prevTime = 0
|
38 |
+
|
39 |
+
while True:
|
40 |
+
start_time = time.time()
|
41 |
+
|
42 |
+
ret, img = cap.read()
|
43 |
+
if not ret:
|
44 |
+
break
|
45 |
+
frame = img.copy()
|
46 |
+
|
47 |
+
dets, img_info = detector.detect(img, conf_thres=0.25, iou_thres=0.45)
|
48 |
+
currTime = time.time()
|
49 |
+
fps = 1 / (currTime - prevTime)
|
50 |
+
prevTime = currTime
|
51 |
+
|
52 |
+
if dets is not None:
|
53 |
+
bbox_xyxy = dets[:, :4]
|
54 |
+
scores = dets[:, 4]
|
55 |
+
class_ids = dets[:, 5]
|
56 |
+
img = draw_boxes(img, bbox_xyxy, class_ids=class_ids)
|
57 |
+
|
58 |
+
cv2.line(img, (20, 25), (127, 25), [85, 45, 255], 30)
|
59 |
+
cv2.putText(img, f'FPS: {int(fps)}', (11, 35), 0, 1, [
|
60 |
+
225, 255, 255], thickness=2, lineType=cv2.LINE_AA)
|
61 |
+
|
62 |
+
|
63 |
+
frame_no+=1
|
64 |
+
if args.display:
|
65 |
+
cv2.imshow('Window', img)
|
66 |
+
|
67 |
+
if args.save:
|
68 |
+
video_writer.write(img)
|
69 |
+
|
70 |
+
if cv2.waitKey(25) & 0xFF == ord('q'):
|
71 |
+
break
|
72 |
+
|
73 |
+
if __name__=='__main__':
|
74 |
+
|
75 |
+
parser = argparse.ArgumentParser()
|
76 |
+
parser.add_argument("video", help="Path of video")
|
77 |
+
parser.add_argument('--cpu', default=True, action='store_false', dest='use_cuda', help='If provided the model will run on cpu otherwise it will run on gpu')
|
78 |
+
parser.add_argument('--filter_classes', default=None, help='Class names seperated by comma (,). e.g. person,car ')
|
79 |
+
parser.add_argument('-w', '--weights', default=None, help='Path of trained weights')
|
80 |
+
parser.add_argument('-o', '--output_path', default='data/results', help='path of output file')
|
81 |
+
parser.add_argument('--no_display', action='store_false', default=True, dest='display', help='if provided video will not be displayed')
|
82 |
+
parser.add_argument('--no_save', action='store_false', default=True, dest='save', help='if provided video will not be saved')
|
83 |
+
|
84 |
+
args = parser.parse_args()
|
85 |
+
main(args)
|
asone/demo_tracker.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from .trackers import Tracker
|
3 |
+
import argparse
|
4 |
+
import asone
|
5 |
+
from .utils import draw_boxes
|
6 |
+
from .detectors import Detector
|
7 |
+
import cv2
|
8 |
+
import os
|
9 |
+
from loguru import logger
|
10 |
+
import time
|
11 |
+
import copy
|
12 |
+
|
13 |
+
def main(args):
|
14 |
+
filter_classes = args.filter_classes
|
15 |
+
|
16 |
+
if filter_classes:
|
17 |
+
filter_classes = filter_classes.split(',')
|
18 |
+
|
19 |
+
detector = Detector(asone.YOLOV7_E6_ONNX, weights=args.weights, use_cuda=args.use_cuda).get_detector()
|
20 |
+
tracker = Tracker(asone.BYTETRACK, detector, use_cuda=args.use_cuda).get_tracker()
|
21 |
+
|
22 |
+
cap = cv2.VideoCapture(args.video_path)
|
23 |
+
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
|
24 |
+
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
25 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
26 |
+
frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
|
27 |
+
output_dir = 'data/results'
|
28 |
+
if args.save_results:
|
29 |
+
os.makedirs(output_dir, exist_ok=True)
|
30 |
+
save_path = os.path.join(output_dir, os.path.basename(args.video_path))
|
31 |
+
logger.info(f"video save path is {save_path}")
|
32 |
+
|
33 |
+
video_writer = cv2.VideoWriter(
|
34 |
+
save_path,
|
35 |
+
cv2.VideoWriter_fourcc(*"mp4v"),
|
36 |
+
fps,
|
37 |
+
(int(width), int(height)),
|
38 |
+
)
|
39 |
+
|
40 |
+
frame_id = 1
|
41 |
+
tic = time.time()
|
42 |
+
|
43 |
+
prevTime = 0
|
44 |
+
|
45 |
+
while True:
|
46 |
+
start_time = time.time()
|
47 |
+
|
48 |
+
ret, frame = cap.read()
|
49 |
+
if not ret:
|
50 |
+
break
|
51 |
+
im0 = copy.deepcopy(frame)
|
52 |
+
|
53 |
+
bboxes_xyxy, ids, scores, class_ids = tracker.detect_and_track(
|
54 |
+
frame, filter_classes=filter_classes)
|
55 |
+
|
56 |
+
elapsed_time = time.time() - start_time
|
57 |
+
|
58 |
+
logger.info(
|
59 |
+
f'frame {frame_id}/{int(frame_count)} {elapsed_time * 1000:.2f} ms')
|
60 |
+
|
61 |
+
im0 = draw_boxes(im0, bboxes_xyxy, class_ids, identities=ids)
|
62 |
+
|
63 |
+
currTime = time.time()
|
64 |
+
fps = 1 / (currTime - prevTime)
|
65 |
+
prevTime = currTime
|
66 |
+
cv2.line(im0, (20, 25), (127, 25), [85, 45, 255], 30)
|
67 |
+
cv2.putText(im0, f'FPS: {int(fps)}', (11, 35), 0, 1, [
|
68 |
+
225, 255, 255], thickness=2, lineType=cv2.LINE_AA)
|
69 |
+
|
70 |
+
if args.display:
|
71 |
+
cv2.imshow(' Sample', im0)
|
72 |
+
if args.save_results:
|
73 |
+
video_writer.write(im0)
|
74 |
+
|
75 |
+
frame_id += 1
|
76 |
+
|
77 |
+
if cv2.waitKey(25) & 0xFF == ord('q'):
|
78 |
+
break
|
79 |
+
|
80 |
+
tac = time.time()
|
81 |
+
print(f'Total Time Taken: {tac - tic:.2f}')
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
if __name__ == '__main__':
|
86 |
+
parser = argparse.ArgumentParser()
|
87 |
+
|
88 |
+
parser.add_argument('video_path', help='Path to input video')
|
89 |
+
parser.add_argument('--cpu', default=True,
|
90 |
+
action='store_false', dest='use_cuda', help='run on cpu')
|
91 |
+
parser.add_argument('--no_display', default=True,
|
92 |
+
action='store_false', dest='display', help='Disable display')
|
93 |
+
parser.add_argument('--no_save', default=True,
|
94 |
+
action='store_false', dest='save_results', help='Disable result saving')
|
95 |
+
|
96 |
+
parser.add_argument('--filter_classes', default=None, help='Class names seperated by comma (,). e.g. person,car ')
|
97 |
+
parser.add_argument('-w', '--weights', default=None, help='Path of trained weights')
|
98 |
+
|
99 |
+
args = parser.parse_args()
|
100 |
+
|
101 |
+
main(args)
|
asone/detectors/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from asone.detectors.yolov5 import YOLOv5Detector
|
2 |
+
from asone.detectors.yolov6 import YOLOv6Detector
|
3 |
+
from asone.detectors.yolov7 import YOLOv7Detector
|
4 |
+
from asone.detectors.yolor import YOLOrDetector
|
5 |
+
from asone.detectors.yolox import YOLOxDetector
|
6 |
+
|
7 |
+
from asone.detectors.detector import Detector
|
8 |
+
__all__ = ['Detector'
|
9 |
+
'YOLOv5Detector',
|
10 |
+
'YOLOv6Detector',
|
11 |
+
'YOLOv7Detector',
|
12 |
+
'YOLOrDetector',
|
13 |
+
'YOLOxDetector']
|
asone/detectors/detector.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
|
3 |
+
from asone.detectors.yolov5 import YOLOv5Detector
|
4 |
+
from asone.detectors.yolov6 import YOLOv6Detector
|
5 |
+
from asone.detectors.yolov7 import YOLOv7Detector
|
6 |
+
from asone.detectors.yolor import YOLOrDetector
|
7 |
+
from asone.detectors.yolox import YOLOxDetector
|
8 |
+
|
9 |
+
from asone.detectors.utils.weights_path import get_weight_path
|
10 |
+
from asone.detectors.utils.cfg_path import get_cfg_path
|
11 |
+
from asone.detectors.utils.exp_name import get_exp__name
|
12 |
+
from .yolov8 import YOLOv8Detector
|
13 |
+
|
14 |
+
|
15 |
+
class Detector:
|
16 |
+
def __init__(self,
|
17 |
+
model_flag: int,
|
18 |
+
weights: str = None,
|
19 |
+
use_cuda: bool = True):
|
20 |
+
|
21 |
+
self.model = self._select_detector(model_flag, weights, use_cuda)
|
22 |
+
|
23 |
+
def _select_detector(self, model_flag, weights, cuda):
|
24 |
+
# Get required weight using model_flag
|
25 |
+
if weights and weights.split('.')[-1] == 'onnx':
|
26 |
+
onnx = True
|
27 |
+
weight = weights
|
28 |
+
elif weights:
|
29 |
+
onnx = False
|
30 |
+
weight = weights
|
31 |
+
else:
|
32 |
+
onnx, weight = get_weight_path(model_flag)
|
33 |
+
|
34 |
+
if model_flag in range(0, 20):
|
35 |
+
_detector = YOLOv5Detector(weights=weight,
|
36 |
+
use_onnx=onnx,
|
37 |
+
use_cuda=cuda)
|
38 |
+
elif model_flag in range(20, 34):
|
39 |
+
_detector = YOLOv6Detector(weights=weight,
|
40 |
+
use_onnx=onnx,
|
41 |
+
use_cuda=cuda)
|
42 |
+
elif model_flag in range(34, 48):
|
43 |
+
_detector = YOLOv7Detector(weights=weight,
|
44 |
+
use_onnx=onnx,
|
45 |
+
use_cuda=cuda)
|
46 |
+
elif model_flag in range(48, 58):
|
47 |
+
# Get Configuration file for Yolor
|
48 |
+
if model_flag in range(48, 57, 2):
|
49 |
+
cfg = get_cfg_path(model_flag)
|
50 |
+
else:
|
51 |
+
cfg = None
|
52 |
+
_detector = YOLOrDetector(weights=weight,
|
53 |
+
cfg=cfg,
|
54 |
+
use_onnx=onnx,
|
55 |
+
use_cuda=cuda)
|
56 |
+
|
57 |
+
elif model_flag in range(58, 72):
|
58 |
+
# Get exp file and corresponding model for pytorch only
|
59 |
+
if model_flag in range(58, 71, 2):
|
60 |
+
exp, model_name = get_exp__name(model_flag)
|
61 |
+
else:
|
62 |
+
exp = model_name = None
|
63 |
+
_detector = YOLOxDetector(model_name=model_name,
|
64 |
+
exp_file=exp,
|
65 |
+
weights=weight,
|
66 |
+
use_onnx=onnx,
|
67 |
+
use_cuda=cuda)
|
68 |
+
elif model_flag in range(72, 82):
|
69 |
+
# Get exp file and corresponding model for pytorch only
|
70 |
+
_detector = YOLOv8Detector(weights=weight,
|
71 |
+
use_onnx=onnx,
|
72 |
+
use_cuda=cuda)
|
73 |
+
|
74 |
+
return _detector
|
75 |
+
|
76 |
+
def get_detector(self):
|
77 |
+
return self.model
|
78 |
+
|
79 |
+
def detect(self,
|
80 |
+
image: list,
|
81 |
+
**kwargs: dict):
|
82 |
+
return self.model.detect(image, **kwargs)
|
83 |
+
|
84 |
+
|
85 |
+
if __name__ == '__main__':
|
86 |
+
|
87 |
+
# Initialize YOLOv6 object detector
|
88 |
+
model_type = 56
|
89 |
+
result = Detector(model_flag=model_type, use_cuda=True)
|
90 |
+
img = cv2.imread('asone/asone-linux/test.jpeg')
|
91 |
+
pred = result.get_detector(img)
|
92 |
+
print(pred)
|
asone/detectors/utils/__init__.py
ADDED
File without changes
|
asone/detectors/utils/cfg_path.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
cfg_dir = os.path.dirname(os.path.dirname(__file__))
|
4 |
+
|
5 |
+
configuration = {'0': os.path.join(cfg_dir, 'yolor','cfg','yolor_csp_x.cfg'),
|
6 |
+
'1': os.path.join(cfg_dir, 'yolor','cfg','yolor_csp.cfg'),
|
7 |
+
'2': os.path.join(cfg_dir, 'yolor','cfg','yolor_p6.cfg')}
|
8 |
+
|
9 |
+
def get_cfg_path(model_flag):
|
10 |
+
if model_flag in [48,50]:
|
11 |
+
cfg = configuration['0']
|
12 |
+
if model_flag in [52,54]:
|
13 |
+
cfg = configuration['1']
|
14 |
+
if model_flag == 56:
|
15 |
+
cfg = configuration['2']
|
16 |
+
return cfg
|
17 |
+
|
18 |
+
|
asone/detectors/utils/exp_name.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
exp_dir = os.path.dirname(os.path.dirname(__file__))
|
4 |
+
|
5 |
+
exp_file_name = {'58': (os.path.join(exp_dir, 'yolox','exps','yolox_l.py'),'yolox-l'),
|
6 |
+
'60': (os.path.join(exp_dir, 'yolox','exps','yolox_nano.py'),'yolox-nano'),
|
7 |
+
'62': (os.path.join(exp_dir, 'yolox','exps','yolox_tiny'),'yolox-tiny'),
|
8 |
+
'64': (os.path.join(exp_dir, 'yolox','exps','yolov3.py'),'yolox-darknet'),
|
9 |
+
'66': (os.path.join(exp_dir, 'yolox','exps','yolox_s.py'),'yolox-s'),
|
10 |
+
'68': (os.path.join(exp_dir, 'yolox','exps','yolox_m.py'),'yolox-m'),
|
11 |
+
'70': (os.path.join(exp_dir, 'yolox','exps','yolox_x.py'),'yolox-x')
|
12 |
+
}
|
13 |
+
|
14 |
+
|
15 |
+
def get_exp__name(model_flag):
|
16 |
+
|
17 |
+
if model_flag == 58:
|
18 |
+
exp, model_name = exp_file_name['58'][0], exp_file_name['58'][1]
|
19 |
+
elif model_flag == 60:
|
20 |
+
exp, model_name = exp_file_name['60'][0], exp_file_name['60'][1]
|
21 |
+
elif model_flag == 62:
|
22 |
+
exp, model_name = exp_file_name['62'][0], exp_file_name['62'][1]
|
23 |
+
elif model_flag == 64:
|
24 |
+
exp, model_name = exp_file_name['64'][0], exp_file_name['64'][1]
|
25 |
+
elif model_flag == 66:
|
26 |
+
exp, model_name = exp_file_name['66'][0], exp_file_name['66'][1]
|
27 |
+
elif model_flag == 68:
|
28 |
+
exp, model_name = exp_file_name['68'][0], exp_file_name['68'][1]
|
29 |
+
elif model_flag == 70:
|
30 |
+
exp, model_name = exp_file_name['70'][0], exp_file_name['70'][1]
|
31 |
+
|
32 |
+
return exp, model_name
|
asone/detectors/utils/weights_path.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
weights = { '0': os.path.join('yolov5','weights','yolov5x6.pt'),
|
4 |
+
'1': os.path.join('yolov5','weights','yolov5x6.onnx'),
|
5 |
+
'2': os.path.join('yolov5','weights','yolov5s.pt'),
|
6 |
+
'3': os.path.join('yolov5','weights','yolov5s.onnx'),
|
7 |
+
'4': os.path.join('yolov5','weights','yolov5n.pt'),
|
8 |
+
'5': os.path.join('yolov5','weights','yolov5n.onnx'),
|
9 |
+
'6': os.path.join('yolov5','weights','yolov5m.pt'),
|
10 |
+
'7': os.path.join('yolov5','weights','yolov5m.onnx'),
|
11 |
+
'8': os.path.join('yolov5','weights','yolov5l.pt'),
|
12 |
+
'9': os.path.join('yolov5','weights','yolov5l.onnx'),
|
13 |
+
'10': os.path.join('yolov5','weights','yolov5x.pt'),
|
14 |
+
'11': os.path.join('yolov5','weights','yolov5x.onnx'),
|
15 |
+
'12': os.path.join('yolov5','weights','yolov5n6.pt'),
|
16 |
+
'13': os.path.join('yolov5','weights','yolov5n6.onnx'),
|
17 |
+
'14': os.path.join('yolov5','weights','yolov5s6.pt'),
|
18 |
+
'15': os.path.join('yolov5','weights','yolov5s6.onnx'),
|
19 |
+
'16': os.path.join('yolov5','weights','yolov5m6.pt'),
|
20 |
+
'17': os.path.join('yolov5','weights','yolov5m6.onnx'),
|
21 |
+
'18': os.path.join('yolov5','weights','yolov5l6.pt'),
|
22 |
+
'19': os.path.join('yolov5','weights','yolov5l6.onnx'),
|
23 |
+
# YOLOv6
|
24 |
+
'20': os.path.join('yolov6','weights','yolov6n.pt'),
|
25 |
+
'21': os.path.join('yolov6','weights','yolov6n.onnx'),
|
26 |
+
'22': os.path.join('yolov6','weights','yolov6t.pt'),
|
27 |
+
'23': os.path.join('yolov6','weights','yolov6t.onnx'),
|
28 |
+
'24': os.path.join('yolov6','weights','yolov6s.pt'),
|
29 |
+
'25': os.path.join('yolov6','weights','yolov6s.onnx'),
|
30 |
+
'26': os.path.join('yolov6','weights','yolov6m.pt'),
|
31 |
+
'27': os.path.join('yolov6','weights','yolov6m.onnx'),
|
32 |
+
'28': os.path.join('yolov6','weights','yolov6l.pt'),
|
33 |
+
'29': os.path.join('yolov6','weights','yolov6l.onnx'),
|
34 |
+
'30': os.path.join('yolov6','weights','yolov6l_relu.pt'),
|
35 |
+
'31': os.path.join('yolov6','weights','yolov6l_relu.onnx'),
|
36 |
+
'32': os.path.join('yolov6','weights','yolov6s_repopt.pt'),
|
37 |
+
'33': os.path.join('yolov6','weights','yolov6s_repopt.onnx'),
|
38 |
+
# YOLOv7
|
39 |
+
'34': os.path.join('yolov7','weights','yolov7-tiny.pt'),
|
40 |
+
'35': os.path.join('yolov7','weights','yolov7-tiny.onnx'),
|
41 |
+
'36': os.path.join('yolov7','weights','yolov7.pt'),
|
42 |
+
'37': os.path.join('yolov7','weights','yolov7.onnx'),
|
43 |
+
'38': os.path.join('yolov7','weights','yolov7x.pt'),
|
44 |
+
'39': os.path.join('yolov7','weights','yolov7x.onnx'),
|
45 |
+
'40': os.path.join('yolov7','weights','yolov7-w6.pt'),
|
46 |
+
'41': os.path.join('yolov7','weights','yolov7-w6.onnx'),
|
47 |
+
'42': os.path.join('yolov7','weights','yolov7-e6.pt'),
|
48 |
+
'43': os.path.join('yolov7','weights','yolov7-e6.onnx'),
|
49 |
+
'44': os.path.join('yolov7','weights','yolov7-d6.pt'),
|
50 |
+
'45': os.path.join('yolov7','weights','yolov7-d6.onnx'),
|
51 |
+
'46': os.path.join('yolov7','weights','yolov7-e6e.pt'),
|
52 |
+
'47': os.path.join('yolov7','weights','yolov7-e6e.onnx'),
|
53 |
+
# YOLOR
|
54 |
+
'48': os.path.join('yolor','weights','yolor_csp_x.pt'),
|
55 |
+
'49': os.path.join('yolor','weights','yolor_csp_x.onnx'),
|
56 |
+
'50': os.path.join('yolor','weights','yolor_csp_x_star.pt'),
|
57 |
+
'51': os.path.join('yolor','weights','yolor_csp_x_star.onnx'),
|
58 |
+
'52': os.path.join('yolor','weights','yolor_csp_star.pt'),
|
59 |
+
'53': os.path.join('yolor','weights','yolor_csp_star.onnx'),
|
60 |
+
'54': os.path.join('yolor','weights','yolor_csp.pt'),
|
61 |
+
'55': os.path.join('yolor','weights','yolor_csp.onnx'),
|
62 |
+
'56': os.path.join('yolor','weights','yolor_p6.pt'),
|
63 |
+
'57': os.path.join('yolor','weights','yolor_p6.onnx'),
|
64 |
+
# YOLOX
|
65 |
+
'58': os.path.join('yolox','weights','yolox_l.pth'),
|
66 |
+
'59': os.path.join('yolox','weights','yolox_l.onnx'),
|
67 |
+
'60': os.path.join('yolox','weights','yolox_nano.pth'),
|
68 |
+
'61': os.path.join('yolox','weights','yolox_nano.onnx'),
|
69 |
+
'62': os.path.join('yolox','weights','yolox_tiny.pth'),
|
70 |
+
'63': os.path.join('yolox','weights','yolox_tiny.onnx'),
|
71 |
+
'64': os.path.join('yolox','weights','yolox_darknet.pth'),
|
72 |
+
'65': os.path.join('yolox','weights','yolox_darknet.onnx'),
|
73 |
+
'66': os.path.join('yolox','weights','yolox_s.pth'),
|
74 |
+
'67': os.path.join('yolox','weights','yolox_s.onnx'),
|
75 |
+
'68': os.path.join('yolox','weights','yolox_m.pth'),
|
76 |
+
'69': os.path.join('yolox','weights','yolox_m.onnx'),
|
77 |
+
'70': os.path.join('yolox','weights','yolox_x.pth'),
|
78 |
+
'71': os.path.join('yolox','weights','yolox_x.onnx'),
|
79 |
+
# YOLOv8
|
80 |
+
'72': os.path.join('yolov8','weights','yolov8n.pt'),
|
81 |
+
'73': os.path.join('yolov8','weights','yolov8n.onnx'),
|
82 |
+
'74': os.path.join('yolov8','weights','yolov8s.pt'),
|
83 |
+
'75': os.path.join('yolov8','weights','yolov8s.onnx'),
|
84 |
+
'76': os.path.join('yolov8','weights','yolov8m.pt'),
|
85 |
+
'77': os.path.join('yolov8','weights','yolov8m.onnx'),
|
86 |
+
'78': os.path.join('yolov8','weights','yolov8l.pt'),
|
87 |
+
'79': os.path.join('yolov8','weights','yolov8l.onnx'),
|
88 |
+
'80': os.path.join('yolov8','weights','yolov8x.pt'),
|
89 |
+
'81': os.path.join('yolov8','weights','yolov8x.onnx')
|
90 |
+
|
91 |
+
|
92 |
+
|
93 |
+
}
|
94 |
+
|
95 |
+
def get_weight_path(model_flag):
|
96 |
+
if model_flag in range(0, 20):
|
97 |
+
onnx = False if (model_flag % 2 == 0) else True
|
98 |
+
weight = weights[str(model_flag)]
|
99 |
+
elif model_flag in range(20, 34):
|
100 |
+
onnx = False if (model_flag % 2 == 0) else True
|
101 |
+
weight = weights[str(model_flag)]
|
102 |
+
elif model_flag in range(34, 48):
|
103 |
+
onnx = False if (model_flag % 2 == 0) else True
|
104 |
+
weight = weights[str(model_flag)]
|
105 |
+
elif model_flag in range(48, 58):
|
106 |
+
onnx = False if (model_flag % 2 == 0) else True
|
107 |
+
weight = weights[str(model_flag)]
|
108 |
+
elif model_flag in range(58, 72):
|
109 |
+
onnx = False if (model_flag % 2 == 0) else True
|
110 |
+
weight = weights[str(model_flag)]
|
111 |
+
elif model_flag in range(72, 82):
|
112 |
+
onnx = False if (model_flag % 2 == 0) else True
|
113 |
+
weight = weights[str(model_flag)]
|
114 |
+
|
115 |
+
|
116 |
+
return onnx, weight
|
117 |
+
|
asone/detectors/yolor/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .yolor_detector import YOLOrDetector
|
2 |
+
__all__ = ['YOLOrDetector']
|
asone/detectors/yolor/cfg/yolor_csp.cfg
ADDED
@@ -0,0 +1,1376 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[net]
|
2 |
+
# Testing
|
3 |
+
#batch=1
|
4 |
+
#subdivisions=1
|
5 |
+
# Training
|
6 |
+
batch=64
|
7 |
+
subdivisions=8
|
8 |
+
width=512
|
9 |
+
height=512
|
10 |
+
channels=3
|
11 |
+
momentum=0.949
|
12 |
+
decay=0.0005
|
13 |
+
angle=0
|
14 |
+
saturation = 1.5
|
15 |
+
exposure = 1.5
|
16 |
+
hue=.1
|
17 |
+
|
18 |
+
learning_rate=0.00261
|
19 |
+
burn_in=1000
|
20 |
+
max_batches = 500500
|
21 |
+
policy=steps
|
22 |
+
steps=400000,450000
|
23 |
+
scales=.1,.1
|
24 |
+
|
25 |
+
#cutmix=1
|
26 |
+
mosaic=1
|
27 |
+
|
28 |
+
|
29 |
+
# ============ Backbone ============ #
|
30 |
+
|
31 |
+
# Stem
|
32 |
+
|
33 |
+
# 0
|
34 |
+
[convolutional]
|
35 |
+
batch_normalize=1
|
36 |
+
filters=32
|
37 |
+
size=3
|
38 |
+
stride=1
|
39 |
+
pad=1
|
40 |
+
activation=silu
|
41 |
+
|
42 |
+
# P1
|
43 |
+
|
44 |
+
# Downsample
|
45 |
+
|
46 |
+
[convolutional]
|
47 |
+
batch_normalize=1
|
48 |
+
filters=64
|
49 |
+
size=3
|
50 |
+
stride=2
|
51 |
+
pad=1
|
52 |
+
activation=silu
|
53 |
+
|
54 |
+
# Residual Block
|
55 |
+
|
56 |
+
[convolutional]
|
57 |
+
batch_normalize=1
|
58 |
+
filters=32
|
59 |
+
size=1
|
60 |
+
stride=1
|
61 |
+
pad=1
|
62 |
+
activation=silu
|
63 |
+
|
64 |
+
[convolutional]
|
65 |
+
batch_normalize=1
|
66 |
+
filters=64
|
67 |
+
size=3
|
68 |
+
stride=1
|
69 |
+
pad=1
|
70 |
+
activation=silu
|
71 |
+
|
72 |
+
# 4 (previous+1+3k)
|
73 |
+
[shortcut]
|
74 |
+
from=-3
|
75 |
+
activation=linear
|
76 |
+
|
77 |
+
# P2
|
78 |
+
|
79 |
+
# Downsample
|
80 |
+
|
81 |
+
[convolutional]
|
82 |
+
batch_normalize=1
|
83 |
+
filters=128
|
84 |
+
size=3
|
85 |
+
stride=2
|
86 |
+
pad=1
|
87 |
+
activation=silu
|
88 |
+
|
89 |
+
# Split
|
90 |
+
|
91 |
+
[convolutional]
|
92 |
+
batch_normalize=1
|
93 |
+
filters=64
|
94 |
+
size=1
|
95 |
+
stride=1
|
96 |
+
pad=1
|
97 |
+
activation=silu
|
98 |
+
|
99 |
+
[route]
|
100 |
+
layers = -2
|
101 |
+
|
102 |
+
[convolutional]
|
103 |
+
batch_normalize=1
|
104 |
+
filters=64
|
105 |
+
size=1
|
106 |
+
stride=1
|
107 |
+
pad=1
|
108 |
+
activation=silu
|
109 |
+
|
110 |
+
# Residual Block
|
111 |
+
|
112 |
+
[convolutional]
|
113 |
+
batch_normalize=1
|
114 |
+
filters=64
|
115 |
+
size=1
|
116 |
+
stride=1
|
117 |
+
pad=1
|
118 |
+
activation=silu
|
119 |
+
|
120 |
+
[convolutional]
|
121 |
+
batch_normalize=1
|
122 |
+
filters=64
|
123 |
+
size=3
|
124 |
+
stride=1
|
125 |
+
pad=1
|
126 |
+
activation=silu
|
127 |
+
|
128 |
+
[shortcut]
|
129 |
+
from=-3
|
130 |
+
activation=linear
|
131 |
+
|
132 |
+
[convolutional]
|
133 |
+
batch_normalize=1
|
134 |
+
filters=64
|
135 |
+
size=1
|
136 |
+
stride=1
|
137 |
+
pad=1
|
138 |
+
activation=silu
|
139 |
+
|
140 |
+
[convolutional]
|
141 |
+
batch_normalize=1
|
142 |
+
filters=64
|
143 |
+
size=3
|
144 |
+
stride=1
|
145 |
+
pad=1
|
146 |
+
activation=silu
|
147 |
+
|
148 |
+
[shortcut]
|
149 |
+
from=-3
|
150 |
+
activation=linear
|
151 |
+
|
152 |
+
# Transition first
|
153 |
+
|
154 |
+
[convolutional]
|
155 |
+
batch_normalize=1
|
156 |
+
filters=64
|
157 |
+
size=1
|
158 |
+
stride=1
|
159 |
+
pad=1
|
160 |
+
activation=silu
|
161 |
+
|
162 |
+
# Merge [-1, -(3k+4)]
|
163 |
+
|
164 |
+
[route]
|
165 |
+
layers = -1,-10
|
166 |
+
|
167 |
+
# Transition last
|
168 |
+
|
169 |
+
# 17 (previous+7+3k)
|
170 |
+
[convolutional]
|
171 |
+
batch_normalize=1
|
172 |
+
filters=128
|
173 |
+
size=1
|
174 |
+
stride=1
|
175 |
+
pad=1
|
176 |
+
activation=silu
|
177 |
+
|
178 |
+
# P3
|
179 |
+
|
180 |
+
# Downsample
|
181 |
+
|
182 |
+
[convolutional]
|
183 |
+
batch_normalize=1
|
184 |
+
filters=256
|
185 |
+
size=3
|
186 |
+
stride=2
|
187 |
+
pad=1
|
188 |
+
activation=silu
|
189 |
+
|
190 |
+
# Split
|
191 |
+
|
192 |
+
[convolutional]
|
193 |
+
batch_normalize=1
|
194 |
+
filters=128
|
195 |
+
size=1
|
196 |
+
stride=1
|
197 |
+
pad=1
|
198 |
+
activation=silu
|
199 |
+
|
200 |
+
[route]
|
201 |
+
layers = -2
|
202 |
+
|
203 |
+
[convolutional]
|
204 |
+
batch_normalize=1
|
205 |
+
filters=128
|
206 |
+
size=1
|
207 |
+
stride=1
|
208 |
+
pad=1
|
209 |
+
activation=silu
|
210 |
+
|
211 |
+
# Residual Block
|
212 |
+
|
213 |
+
[convolutional]
|
214 |
+
batch_normalize=1
|
215 |
+
filters=128
|
216 |
+
size=1
|
217 |
+
stride=1
|
218 |
+
pad=1
|
219 |
+
activation=silu
|
220 |
+
|
221 |
+
[convolutional]
|
222 |
+
batch_normalize=1
|
223 |
+
filters=128
|
224 |
+
size=3
|
225 |
+
stride=1
|
226 |
+
pad=1
|
227 |
+
activation=silu
|
228 |
+
|
229 |
+
[shortcut]
|
230 |
+
from=-3
|
231 |
+
activation=linear
|
232 |
+
|
233 |
+
[convolutional]
|
234 |
+
batch_normalize=1
|
235 |
+
filters=128
|
236 |
+
size=1
|
237 |
+
stride=1
|
238 |
+
pad=1
|
239 |
+
activation=silu
|
240 |
+
|
241 |
+
[convolutional]
|
242 |
+
batch_normalize=1
|
243 |
+
filters=128
|
244 |
+
size=3
|
245 |
+
stride=1
|
246 |
+
pad=1
|
247 |
+
activation=silu
|
248 |
+
|
249 |
+
[shortcut]
|
250 |
+
from=-3
|
251 |
+
activation=linear
|
252 |
+
|
253 |
+
[convolutional]
|
254 |
+
batch_normalize=1
|
255 |
+
filters=128
|
256 |
+
size=1
|
257 |
+
stride=1
|
258 |
+
pad=1
|
259 |
+
activation=silu
|
260 |
+
|
261 |
+
[convolutional]
|
262 |
+
batch_normalize=1
|
263 |
+
filters=128
|
264 |
+
size=3
|
265 |
+
stride=1
|
266 |
+
pad=1
|
267 |
+
activation=silu
|
268 |
+
|
269 |
+
[shortcut]
|
270 |
+
from=-3
|
271 |
+
activation=linear
|
272 |
+
|
273 |
+
[convolutional]
|
274 |
+
batch_normalize=1
|
275 |
+
filters=128
|
276 |
+
size=1
|
277 |
+
stride=1
|
278 |
+
pad=1
|
279 |
+
activation=silu
|
280 |
+
|
281 |
+
[convolutional]
|
282 |
+
batch_normalize=1
|
283 |
+
filters=128
|
284 |
+
size=3
|
285 |
+
stride=1
|
286 |
+
pad=1
|
287 |
+
activation=silu
|
288 |
+
|
289 |
+
[shortcut]
|
290 |
+
from=-3
|
291 |
+
activation=linear
|
292 |
+
|
293 |
+
[convolutional]
|
294 |
+
batch_normalize=1
|
295 |
+
filters=128
|
296 |
+
size=1
|
297 |
+
stride=1
|
298 |
+
pad=1
|
299 |
+
activation=silu
|
300 |
+
|
301 |
+
[convolutional]
|
302 |
+
batch_normalize=1
|
303 |
+
filters=128
|
304 |
+
size=3
|
305 |
+
stride=1
|
306 |
+
pad=1
|
307 |
+
activation=silu
|
308 |
+
|
309 |
+
[shortcut]
|
310 |
+
from=-3
|
311 |
+
activation=linear
|
312 |
+
|
313 |
+
[convolutional]
|
314 |
+
batch_normalize=1
|
315 |
+
filters=128
|
316 |
+
size=1
|
317 |
+
stride=1
|
318 |
+
pad=1
|
319 |
+
activation=silu
|
320 |
+
|
321 |
+
[convolutional]
|
322 |
+
batch_normalize=1
|
323 |
+
filters=128
|
324 |
+
size=3
|
325 |
+
stride=1
|
326 |
+
pad=1
|
327 |
+
activation=silu
|
328 |
+
|
329 |
+
[shortcut]
|
330 |
+
from=-3
|
331 |
+
activation=linear
|
332 |
+
|
333 |
+
[convolutional]
|
334 |
+
batch_normalize=1
|
335 |
+
filters=128
|
336 |
+
size=1
|
337 |
+
stride=1
|
338 |
+
pad=1
|
339 |
+
activation=silu
|
340 |
+
|
341 |
+
[convolutional]
|
342 |
+
batch_normalize=1
|
343 |
+
filters=128
|
344 |
+
size=3
|
345 |
+
stride=1
|
346 |
+
pad=1
|
347 |
+
activation=silu
|
348 |
+
|
349 |
+
[shortcut]
|
350 |
+
from=-3
|
351 |
+
activation=linear
|
352 |
+
|
353 |
+
[convolutional]
|
354 |
+
batch_normalize=1
|
355 |
+
filters=128
|
356 |
+
size=1
|
357 |
+
stride=1
|
358 |
+
pad=1
|
359 |
+
activation=silu
|
360 |
+
|
361 |
+
[convolutional]
|
362 |
+
batch_normalize=1
|
363 |
+
filters=128
|
364 |
+
size=3
|
365 |
+
stride=1
|
366 |
+
pad=1
|
367 |
+
activation=silu
|
368 |
+
|
369 |
+
[shortcut]
|
370 |
+
from=-3
|
371 |
+
activation=linear
|
372 |
+
|
373 |
+
# Transition first
|
374 |
+
|
375 |
+
[convolutional]
|
376 |
+
batch_normalize=1
|
377 |
+
filters=128
|
378 |
+
size=1
|
379 |
+
stride=1
|
380 |
+
pad=1
|
381 |
+
activation=silu
|
382 |
+
|
383 |
+
# Merge [-1 -(4+3k)]
|
384 |
+
|
385 |
+
[route]
|
386 |
+
layers = -1,-28
|
387 |
+
|
388 |
+
# Transition last
|
389 |
+
|
390 |
+
# 48 (previous+7+3k)
|
391 |
+
[convolutional]
|
392 |
+
batch_normalize=1
|
393 |
+
filters=256
|
394 |
+
size=1
|
395 |
+
stride=1
|
396 |
+
pad=1
|
397 |
+
activation=silu
|
398 |
+
|
399 |
+
# P4
|
400 |
+
|
401 |
+
# Downsample
|
402 |
+
|
403 |
+
[convolutional]
|
404 |
+
batch_normalize=1
|
405 |
+
filters=512
|
406 |
+
size=3
|
407 |
+
stride=2
|
408 |
+
pad=1
|
409 |
+
activation=silu
|
410 |
+
|
411 |
+
# Split
|
412 |
+
|
413 |
+
[convolutional]
|
414 |
+
batch_normalize=1
|
415 |
+
filters=256
|
416 |
+
size=1
|
417 |
+
stride=1
|
418 |
+
pad=1
|
419 |
+
activation=silu
|
420 |
+
|
421 |
+
[route]
|
422 |
+
layers = -2
|
423 |
+
|
424 |
+
[convolutional]
|
425 |
+
batch_normalize=1
|
426 |
+
filters=256
|
427 |
+
size=1
|
428 |
+
stride=1
|
429 |
+
pad=1
|
430 |
+
activation=silu
|
431 |
+
|
432 |
+
# Residual Block
|
433 |
+
|
434 |
+
[convolutional]
|
435 |
+
batch_normalize=1
|
436 |
+
filters=256
|
437 |
+
size=1
|
438 |
+
stride=1
|
439 |
+
pad=1
|
440 |
+
activation=silu
|
441 |
+
|
442 |
+
[convolutional]
|
443 |
+
batch_normalize=1
|
444 |
+
filters=256
|
445 |
+
size=3
|
446 |
+
stride=1
|
447 |
+
pad=1
|
448 |
+
activation=silu
|
449 |
+
|
450 |
+
[shortcut]
|
451 |
+
from=-3
|
452 |
+
activation=linear
|
453 |
+
|
454 |
+
[convolutional]
|
455 |
+
batch_normalize=1
|
456 |
+
filters=256
|
457 |
+
size=1
|
458 |
+
stride=1
|
459 |
+
pad=1
|
460 |
+
activation=silu
|
461 |
+
|
462 |
+
[convolutional]
|
463 |
+
batch_normalize=1
|
464 |
+
filters=256
|
465 |
+
size=3
|
466 |
+
stride=1
|
467 |
+
pad=1
|
468 |
+
activation=silu
|
469 |
+
|
470 |
+
[shortcut]
|
471 |
+
from=-3
|
472 |
+
activation=linear
|
473 |
+
|
474 |
+
[convolutional]
|
475 |
+
batch_normalize=1
|
476 |
+
filters=256
|
477 |
+
size=1
|
478 |
+
stride=1
|
479 |
+
pad=1
|
480 |
+
activation=silu
|
481 |
+
|
482 |
+
[convolutional]
|
483 |
+
batch_normalize=1
|
484 |
+
filters=256
|
485 |
+
size=3
|
486 |
+
stride=1
|
487 |
+
pad=1
|
488 |
+
activation=silu
|
489 |
+
|
490 |
+
[shortcut]
|
491 |
+
from=-3
|
492 |
+
activation=linear
|
493 |
+
|
494 |
+
[convolutional]
|
495 |
+
batch_normalize=1
|
496 |
+
filters=256
|
497 |
+
size=1
|
498 |
+
stride=1
|
499 |
+
pad=1
|
500 |
+
activation=silu
|
501 |
+
|
502 |
+
[convolutional]
|
503 |
+
batch_normalize=1
|
504 |
+
filters=256
|
505 |
+
size=3
|
506 |
+
stride=1
|
507 |
+
pad=1
|
508 |
+
activation=silu
|
509 |
+
|
510 |
+
[shortcut]
|
511 |
+
from=-3
|
512 |
+
activation=linear
|
513 |
+
|
514 |
+
[convolutional]
|
515 |
+
batch_normalize=1
|
516 |
+
filters=256
|
517 |
+
size=1
|
518 |
+
stride=1
|
519 |
+
pad=1
|
520 |
+
activation=silu
|
521 |
+
|
522 |
+
[convolutional]
|
523 |
+
batch_normalize=1
|
524 |
+
filters=256
|
525 |
+
size=3
|
526 |
+
stride=1
|
527 |
+
pad=1
|
528 |
+
activation=silu
|
529 |
+
|
530 |
+
[shortcut]
|
531 |
+
from=-3
|
532 |
+
activation=linear
|
533 |
+
|
534 |
+
[convolutional]
|
535 |
+
batch_normalize=1
|
536 |
+
filters=256
|
537 |
+
size=1
|
538 |
+
stride=1
|
539 |
+
pad=1
|
540 |
+
activation=silu
|
541 |
+
|
542 |
+
[convolutional]
|
543 |
+
batch_normalize=1
|
544 |
+
filters=256
|
545 |
+
size=3
|
546 |
+
stride=1
|
547 |
+
pad=1
|
548 |
+
activation=silu
|
549 |
+
|
550 |
+
[shortcut]
|
551 |
+
from=-3
|
552 |
+
activation=linear
|
553 |
+
|
554 |
+
[convolutional]
|
555 |
+
batch_normalize=1
|
556 |
+
filters=256
|
557 |
+
size=1
|
558 |
+
stride=1
|
559 |
+
pad=1
|
560 |
+
activation=silu
|
561 |
+
|
562 |
+
[convolutional]
|
563 |
+
batch_normalize=1
|
564 |
+
filters=256
|
565 |
+
size=3
|
566 |
+
stride=1
|
567 |
+
pad=1
|
568 |
+
activation=silu
|
569 |
+
|
570 |
+
[shortcut]
|
571 |
+
from=-3
|
572 |
+
activation=linear
|
573 |
+
|
574 |
+
[convolutional]
|
575 |
+
batch_normalize=1
|
576 |
+
filters=256
|
577 |
+
size=1
|
578 |
+
stride=1
|
579 |
+
pad=1
|
580 |
+
activation=silu
|
581 |
+
|
582 |
+
[convolutional]
|
583 |
+
batch_normalize=1
|
584 |
+
filters=256
|
585 |
+
size=3
|
586 |
+
stride=1
|
587 |
+
pad=1
|
588 |
+
activation=silu
|
589 |
+
|
590 |
+
[shortcut]
|
591 |
+
from=-3
|
592 |
+
activation=linear
|
593 |
+
|
594 |
+
# Transition first
|
595 |
+
|
596 |
+
[convolutional]
|
597 |
+
batch_normalize=1
|
598 |
+
filters=256
|
599 |
+
size=1
|
600 |
+
stride=1
|
601 |
+
pad=1
|
602 |
+
activation=silu
|
603 |
+
|
604 |
+
# Merge [-1 -(3k+4)]
|
605 |
+
|
606 |
+
[route]
|
607 |
+
layers = -1,-28
|
608 |
+
|
609 |
+
# Transition last
|
610 |
+
|
611 |
+
# 79 (previous+7+3k)
|
612 |
+
[convolutional]
|
613 |
+
batch_normalize=1
|
614 |
+
filters=512
|
615 |
+
size=1
|
616 |
+
stride=1
|
617 |
+
pad=1
|
618 |
+
activation=silu
|
619 |
+
|
620 |
+
# P5
|
621 |
+
|
622 |
+
# Downsample
|
623 |
+
|
624 |
+
[convolutional]
|
625 |
+
batch_normalize=1
|
626 |
+
filters=1024
|
627 |
+
size=3
|
628 |
+
stride=2
|
629 |
+
pad=1
|
630 |
+
activation=silu
|
631 |
+
|
632 |
+
# Split
|
633 |
+
|
634 |
+
[convolutional]
|
635 |
+
batch_normalize=1
|
636 |
+
filters=512
|
637 |
+
size=1
|
638 |
+
stride=1
|
639 |
+
pad=1
|
640 |
+
activation=silu
|
641 |
+
|
642 |
+
[route]
|
643 |
+
layers = -2
|
644 |
+
|
645 |
+
[convolutional]
|
646 |
+
batch_normalize=1
|
647 |
+
filters=512
|
648 |
+
size=1
|
649 |
+
stride=1
|
650 |
+
pad=1
|
651 |
+
activation=silu
|
652 |
+
|
653 |
+
# Residual Block
|
654 |
+
|
655 |
+
[convolutional]
|
656 |
+
batch_normalize=1
|
657 |
+
filters=512
|
658 |
+
size=1
|
659 |
+
stride=1
|
660 |
+
pad=1
|
661 |
+
activation=silu
|
662 |
+
|
663 |
+
[convolutional]
|
664 |
+
batch_normalize=1
|
665 |
+
filters=512
|
666 |
+
size=3
|
667 |
+
stride=1
|
668 |
+
pad=1
|
669 |
+
activation=silu
|
670 |
+
|
671 |
+
[shortcut]
|
672 |
+
from=-3
|
673 |
+
activation=linear
|
674 |
+
|
675 |
+
[convolutional]
|
676 |
+
batch_normalize=1
|
677 |
+
filters=512
|
678 |
+
size=1
|
679 |
+
stride=1
|
680 |
+
pad=1
|
681 |
+
activation=silu
|
682 |
+
|
683 |
+
[convolutional]
|
684 |
+
batch_normalize=1
|
685 |
+
filters=512
|
686 |
+
size=3
|
687 |
+
stride=1
|
688 |
+
pad=1
|
689 |
+
activation=silu
|
690 |
+
|
691 |
+
[shortcut]
|
692 |
+
from=-3
|
693 |
+
activation=linear
|
694 |
+
|
695 |
+
[convolutional]
|
696 |
+
batch_normalize=1
|
697 |
+
filters=512
|
698 |
+
size=1
|
699 |
+
stride=1
|
700 |
+
pad=1
|
701 |
+
activation=silu
|
702 |
+
|
703 |
+
[convolutional]
|
704 |
+
batch_normalize=1
|
705 |
+
filters=512
|
706 |
+
size=3
|
707 |
+
stride=1
|
708 |
+
pad=1
|
709 |
+
activation=silu
|
710 |
+
|
711 |
+
[shortcut]
|
712 |
+
from=-3
|
713 |
+
activation=linear
|
714 |
+
|
715 |
+
[convolutional]
|
716 |
+
batch_normalize=1
|
717 |
+
filters=512
|
718 |
+
size=1
|
719 |
+
stride=1
|
720 |
+
pad=1
|
721 |
+
activation=silu
|
722 |
+
|
723 |
+
[convolutional]
|
724 |
+
batch_normalize=1
|
725 |
+
filters=512
|
726 |
+
size=3
|
727 |
+
stride=1
|
728 |
+
pad=1
|
729 |
+
activation=silu
|
730 |
+
|
731 |
+
[shortcut]
|
732 |
+
from=-3
|
733 |
+
activation=linear
|
734 |
+
|
735 |
+
# Transition first
|
736 |
+
|
737 |
+
[convolutional]
|
738 |
+
batch_normalize=1
|
739 |
+
filters=512
|
740 |
+
size=1
|
741 |
+
stride=1
|
742 |
+
pad=1
|
743 |
+
activation=silu
|
744 |
+
|
745 |
+
# Merge [-1 -(3k+4)]
|
746 |
+
|
747 |
+
[route]
|
748 |
+
layers = -1,-16
|
749 |
+
|
750 |
+
# Transition last
|
751 |
+
|
752 |
+
# 98 (previous+7+3k)
|
753 |
+
[convolutional]
|
754 |
+
batch_normalize=1
|
755 |
+
filters=1024
|
756 |
+
size=1
|
757 |
+
stride=1
|
758 |
+
pad=1
|
759 |
+
activation=silu
|
760 |
+
|
761 |
+
# ============ End of Backbone ============ #
|
762 |
+
|
763 |
+
# ============ Neck ============ #
|
764 |
+
|
765 |
+
# CSPSPP
|
766 |
+
|
767 |
+
[convolutional]
|
768 |
+
batch_normalize=1
|
769 |
+
filters=512
|
770 |
+
size=1
|
771 |
+
stride=1
|
772 |
+
pad=1
|
773 |
+
activation=silu
|
774 |
+
|
775 |
+
[route]
|
776 |
+
layers = -2
|
777 |
+
|
778 |
+
[convolutional]
|
779 |
+
batch_normalize=1
|
780 |
+
filters=512
|
781 |
+
size=1
|
782 |
+
stride=1
|
783 |
+
pad=1
|
784 |
+
activation=silu
|
785 |
+
|
786 |
+
[convolutional]
|
787 |
+
batch_normalize=1
|
788 |
+
size=3
|
789 |
+
stride=1
|
790 |
+
pad=1
|
791 |
+
filters=512
|
792 |
+
activation=silu
|
793 |
+
|
794 |
+
[convolutional]
|
795 |
+
batch_normalize=1
|
796 |
+
filters=512
|
797 |
+
size=1
|
798 |
+
stride=1
|
799 |
+
pad=1
|
800 |
+
activation=silu
|
801 |
+
|
802 |
+
### SPP ###
|
803 |
+
[maxpool]
|
804 |
+
stride=1
|
805 |
+
size=5
|
806 |
+
|
807 |
+
[route]
|
808 |
+
layers=-2
|
809 |
+
|
810 |
+
[maxpool]
|
811 |
+
stride=1
|
812 |
+
size=9
|
813 |
+
|
814 |
+
[route]
|
815 |
+
layers=-4
|
816 |
+
|
817 |
+
[maxpool]
|
818 |
+
stride=1
|
819 |
+
size=13
|
820 |
+
|
821 |
+
[route]
|
822 |
+
layers=-1,-3,-5,-6
|
823 |
+
### End SPP ###
|
824 |
+
|
825 |
+
[convolutional]
|
826 |
+
batch_normalize=1
|
827 |
+
filters=512
|
828 |
+
size=1
|
829 |
+
stride=1
|
830 |
+
pad=1
|
831 |
+
activation=silu
|
832 |
+
|
833 |
+
[convolutional]
|
834 |
+
batch_normalize=1
|
835 |
+
size=3
|
836 |
+
stride=1
|
837 |
+
pad=1
|
838 |
+
filters=512
|
839 |
+
activation=silu
|
840 |
+
|
841 |
+
[route]
|
842 |
+
layers = -1, -13
|
843 |
+
|
844 |
+
# 113 (previous+6+5+2k)
|
845 |
+
[convolutional]
|
846 |
+
batch_normalize=1
|
847 |
+
filters=512
|
848 |
+
size=1
|
849 |
+
stride=1
|
850 |
+
pad=1
|
851 |
+
activation=silu
|
852 |
+
|
853 |
+
# End of CSPSPP
|
854 |
+
|
855 |
+
|
856 |
+
# FPN-4
|
857 |
+
|
858 |
+
[convolutional]
|
859 |
+
batch_normalize=1
|
860 |
+
filters=256
|
861 |
+
size=1
|
862 |
+
stride=1
|
863 |
+
pad=1
|
864 |
+
activation=silu
|
865 |
+
|
866 |
+
[upsample]
|
867 |
+
stride=2
|
868 |
+
|
869 |
+
[route]
|
870 |
+
layers = 79
|
871 |
+
|
872 |
+
[convolutional]
|
873 |
+
batch_normalize=1
|
874 |
+
filters=256
|
875 |
+
size=1
|
876 |
+
stride=1
|
877 |
+
pad=1
|
878 |
+
activation=silu
|
879 |
+
|
880 |
+
[route]
|
881 |
+
layers = -1, -3
|
882 |
+
|
883 |
+
[convolutional]
|
884 |
+
batch_normalize=1
|
885 |
+
filters=256
|
886 |
+
size=1
|
887 |
+
stride=1
|
888 |
+
pad=1
|
889 |
+
activation=silu
|
890 |
+
|
891 |
+
# Split
|
892 |
+
|
893 |
+
[convolutional]
|
894 |
+
batch_normalize=1
|
895 |
+
filters=256
|
896 |
+
size=1
|
897 |
+
stride=1
|
898 |
+
pad=1
|
899 |
+
activation=silu
|
900 |
+
|
901 |
+
[route]
|
902 |
+
layers = -2
|
903 |
+
|
904 |
+
# Plain Block
|
905 |
+
|
906 |
+
[convolutional]
|
907 |
+
batch_normalize=1
|
908 |
+
filters=256
|
909 |
+
size=1
|
910 |
+
stride=1
|
911 |
+
pad=1
|
912 |
+
activation=silu
|
913 |
+
|
914 |
+
[convolutional]
|
915 |
+
batch_normalize=1
|
916 |
+
size=3
|
917 |
+
stride=1
|
918 |
+
pad=1
|
919 |
+
filters=256
|
920 |
+
activation=silu
|
921 |
+
|
922 |
+
[convolutional]
|
923 |
+
batch_normalize=1
|
924 |
+
filters=256
|
925 |
+
size=1
|
926 |
+
stride=1
|
927 |
+
pad=1
|
928 |
+
activation=silu
|
929 |
+
|
930 |
+
[convolutional]
|
931 |
+
batch_normalize=1
|
932 |
+
size=3
|
933 |
+
stride=1
|
934 |
+
pad=1
|
935 |
+
filters=256
|
936 |
+
activation=silu
|
937 |
+
|
938 |
+
# Merge [-1, -(2k+2)]
|
939 |
+
|
940 |
+
[route]
|
941 |
+
layers = -1, -6
|
942 |
+
|
943 |
+
# Transition last
|
944 |
+
|
945 |
+
# 127 (previous+6+4+2k)
|
946 |
+
[convolutional]
|
947 |
+
batch_normalize=1
|
948 |
+
filters=256
|
949 |
+
size=1
|
950 |
+
stride=1
|
951 |
+
pad=1
|
952 |
+
activation=silu
|
953 |
+
|
954 |
+
|
955 |
+
# FPN-3
|
956 |
+
|
957 |
+
[convolutional]
|
958 |
+
batch_normalize=1
|
959 |
+
filters=128
|
960 |
+
size=1
|
961 |
+
stride=1
|
962 |
+
pad=1
|
963 |
+
activation=silu
|
964 |
+
|
965 |
+
[upsample]
|
966 |
+
stride=2
|
967 |
+
|
968 |
+
[route]
|
969 |
+
layers = 48
|
970 |
+
|
971 |
+
[convolutional]
|
972 |
+
batch_normalize=1
|
973 |
+
filters=128
|
974 |
+
size=1
|
975 |
+
stride=1
|
976 |
+
pad=1
|
977 |
+
activation=silu
|
978 |
+
|
979 |
+
[route]
|
980 |
+
layers = -1, -3
|
981 |
+
|
982 |
+
[convolutional]
|
983 |
+
batch_normalize=1
|
984 |
+
filters=128
|
985 |
+
size=1
|
986 |
+
stride=1
|
987 |
+
pad=1
|
988 |
+
activation=silu
|
989 |
+
|
990 |
+
# Split
|
991 |
+
|
992 |
+
[convolutional]
|
993 |
+
batch_normalize=1
|
994 |
+
filters=128
|
995 |
+
size=1
|
996 |
+
stride=1
|
997 |
+
pad=1
|
998 |
+
activation=silu
|
999 |
+
|
1000 |
+
[route]
|
1001 |
+
layers = -2
|
1002 |
+
|
1003 |
+
# Plain Block
|
1004 |
+
|
1005 |
+
[convolutional]
|
1006 |
+
batch_normalize=1
|
1007 |
+
filters=128
|
1008 |
+
size=1
|
1009 |
+
stride=1
|
1010 |
+
pad=1
|
1011 |
+
activation=silu
|
1012 |
+
|
1013 |
+
[convolutional]
|
1014 |
+
batch_normalize=1
|
1015 |
+
size=3
|
1016 |
+
stride=1
|
1017 |
+
pad=1
|
1018 |
+
filters=128
|
1019 |
+
activation=silu
|
1020 |
+
|
1021 |
+
[convolutional]
|
1022 |
+
batch_normalize=1
|
1023 |
+
filters=128
|
1024 |
+
size=1
|
1025 |
+
stride=1
|
1026 |
+
pad=1
|
1027 |
+
activation=silu
|
1028 |
+
|
1029 |
+
[convolutional]
|
1030 |
+
batch_normalize=1
|
1031 |
+
size=3
|
1032 |
+
stride=1
|
1033 |
+
pad=1
|
1034 |
+
filters=128
|
1035 |
+
activation=silu
|
1036 |
+
|
1037 |
+
# Merge [-1, -(2k+2)]
|
1038 |
+
|
1039 |
+
[route]
|
1040 |
+
layers = -1, -6
|
1041 |
+
|
1042 |
+
# Transition last
|
1043 |
+
|
1044 |
+
# 141 (previous+6+4+2k)
|
1045 |
+
[convolutional]
|
1046 |
+
batch_normalize=1
|
1047 |
+
filters=128
|
1048 |
+
size=1
|
1049 |
+
stride=1
|
1050 |
+
pad=1
|
1051 |
+
activation=silu
|
1052 |
+
|
1053 |
+
|
1054 |
+
# PAN-4
|
1055 |
+
|
1056 |
+
[convolutional]
|
1057 |
+
batch_normalize=1
|
1058 |
+
size=3
|
1059 |
+
stride=2
|
1060 |
+
pad=1
|
1061 |
+
filters=256
|
1062 |
+
activation=silu
|
1063 |
+
|
1064 |
+
[route]
|
1065 |
+
layers = -1, 127
|
1066 |
+
|
1067 |
+
[convolutional]
|
1068 |
+
batch_normalize=1
|
1069 |
+
filters=256
|
1070 |
+
size=1
|
1071 |
+
stride=1
|
1072 |
+
pad=1
|
1073 |
+
activation=silu
|
1074 |
+
|
1075 |
+
# Split
|
1076 |
+
|
1077 |
+
[convolutional]
|
1078 |
+
batch_normalize=1
|
1079 |
+
filters=256
|
1080 |
+
size=1
|
1081 |
+
stride=1
|
1082 |
+
pad=1
|
1083 |
+
activation=silu
|
1084 |
+
|
1085 |
+
[route]
|
1086 |
+
layers = -2
|
1087 |
+
|
1088 |
+
# Plain Block
|
1089 |
+
|
1090 |
+
[convolutional]
|
1091 |
+
batch_normalize=1
|
1092 |
+
filters=256
|
1093 |
+
size=1
|
1094 |
+
stride=1
|
1095 |
+
pad=1
|
1096 |
+
activation=silu
|
1097 |
+
|
1098 |
+
[convolutional]
|
1099 |
+
batch_normalize=1
|
1100 |
+
size=3
|
1101 |
+
stride=1
|
1102 |
+
pad=1
|
1103 |
+
filters=256
|
1104 |
+
activation=silu
|
1105 |
+
|
1106 |
+
[convolutional]
|
1107 |
+
batch_normalize=1
|
1108 |
+
filters=256
|
1109 |
+
size=1
|
1110 |
+
stride=1
|
1111 |
+
pad=1
|
1112 |
+
activation=silu
|
1113 |
+
|
1114 |
+
[convolutional]
|
1115 |
+
batch_normalize=1
|
1116 |
+
size=3
|
1117 |
+
stride=1
|
1118 |
+
pad=1
|
1119 |
+
filters=256
|
1120 |
+
activation=silu
|
1121 |
+
|
1122 |
+
[route]
|
1123 |
+
layers = -1,-6
|
1124 |
+
|
1125 |
+
# Transition last
|
1126 |
+
|
1127 |
+
# 152 (previous+3+4+2k)
|
1128 |
+
[convolutional]
|
1129 |
+
batch_normalize=1
|
1130 |
+
filters=256
|
1131 |
+
size=1
|
1132 |
+
stride=1
|
1133 |
+
pad=1
|
1134 |
+
activation=silu
|
1135 |
+
|
1136 |
+
|
1137 |
+
# PAN-5
|
1138 |
+
|
1139 |
+
[convolutional]
|
1140 |
+
batch_normalize=1
|
1141 |
+
size=3
|
1142 |
+
stride=2
|
1143 |
+
pad=1
|
1144 |
+
filters=512
|
1145 |
+
activation=silu
|
1146 |
+
|
1147 |
+
[route]
|
1148 |
+
layers = -1, 113
|
1149 |
+
|
1150 |
+
[convolutional]
|
1151 |
+
batch_normalize=1
|
1152 |
+
filters=512
|
1153 |
+
size=1
|
1154 |
+
stride=1
|
1155 |
+
pad=1
|
1156 |
+
activation=silu
|
1157 |
+
|
1158 |
+
# Split
|
1159 |
+
|
1160 |
+
[convolutional]
|
1161 |
+
batch_normalize=1
|
1162 |
+
filters=512
|
1163 |
+
size=1
|
1164 |
+
stride=1
|
1165 |
+
pad=1
|
1166 |
+
activation=silu
|
1167 |
+
|
1168 |
+
[route]
|
1169 |
+
layers = -2
|
1170 |
+
|
1171 |
+
# Plain Block
|
1172 |
+
|
1173 |
+
[convolutional]
|
1174 |
+
batch_normalize=1
|
1175 |
+
filters=512
|
1176 |
+
size=1
|
1177 |
+
stride=1
|
1178 |
+
pad=1
|
1179 |
+
activation=silu
|
1180 |
+
|
1181 |
+
[convolutional]
|
1182 |
+
batch_normalize=1
|
1183 |
+
size=3
|
1184 |
+
stride=1
|
1185 |
+
pad=1
|
1186 |
+
filters=512
|
1187 |
+
activation=silu
|
1188 |
+
|
1189 |
+
[convolutional]
|
1190 |
+
batch_normalize=1
|
1191 |
+
filters=512
|
1192 |
+
size=1
|
1193 |
+
stride=1
|
1194 |
+
pad=1
|
1195 |
+
activation=silu
|
1196 |
+
|
1197 |
+
[convolutional]
|
1198 |
+
batch_normalize=1
|
1199 |
+
size=3
|
1200 |
+
stride=1
|
1201 |
+
pad=1
|
1202 |
+
filters=512
|
1203 |
+
activation=silu
|
1204 |
+
|
1205 |
+
[route]
|
1206 |
+
layers = -1,-6
|
1207 |
+
|
1208 |
+
# Transition last
|
1209 |
+
|
1210 |
+
# 163 (previous+3+4+2k)
|
1211 |
+
[convolutional]
|
1212 |
+
batch_normalize=1
|
1213 |
+
filters=512
|
1214 |
+
size=1
|
1215 |
+
stride=1
|
1216 |
+
pad=1
|
1217 |
+
activation=silu
|
1218 |
+
|
1219 |
+
# ============ End of Neck ============ #
|
1220 |
+
|
1221 |
+
# 164
|
1222 |
+
[implicit_add]
|
1223 |
+
filters=256
|
1224 |
+
|
1225 |
+
# 165
|
1226 |
+
[implicit_add]
|
1227 |
+
filters=512
|
1228 |
+
|
1229 |
+
# 166
|
1230 |
+
[implicit_add]
|
1231 |
+
filters=1024
|
1232 |
+
|
1233 |
+
# 167
|
1234 |
+
[implicit_mul]
|
1235 |
+
filters=255
|
1236 |
+
|
1237 |
+
# 168
|
1238 |
+
[implicit_mul]
|
1239 |
+
filters=255
|
1240 |
+
|
1241 |
+
# 169
|
1242 |
+
[implicit_mul]
|
1243 |
+
filters=255
|
1244 |
+
|
1245 |
+
# ============ Head ============ #
|
1246 |
+
|
1247 |
+
# YOLO-3
|
1248 |
+
|
1249 |
+
[route]
|
1250 |
+
layers = 141
|
1251 |
+
|
1252 |
+
[convolutional]
|
1253 |
+
batch_normalize=1
|
1254 |
+
size=3
|
1255 |
+
stride=1
|
1256 |
+
pad=1
|
1257 |
+
filters=256
|
1258 |
+
activation=silu
|
1259 |
+
|
1260 |
+
[shift_channels]
|
1261 |
+
from=164
|
1262 |
+
|
1263 |
+
[convolutional]
|
1264 |
+
size=1
|
1265 |
+
stride=1
|
1266 |
+
pad=1
|
1267 |
+
filters=255
|
1268 |
+
activation=linear
|
1269 |
+
|
1270 |
+
[control_channels]
|
1271 |
+
from=167
|
1272 |
+
|
1273 |
+
[yolo]
|
1274 |
+
mask = 0,1,2
|
1275 |
+
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
|
1276 |
+
classes=80
|
1277 |
+
num=9
|
1278 |
+
jitter=.3
|
1279 |
+
ignore_thresh = .7
|
1280 |
+
truth_thresh = 1
|
1281 |
+
random=1
|
1282 |
+
scale_x_y = 1.05
|
1283 |
+
iou_thresh=0.213
|
1284 |
+
cls_normalizer=1.0
|
1285 |
+
iou_normalizer=0.07
|
1286 |
+
iou_loss=ciou
|
1287 |
+
nms_kind=greedynms
|
1288 |
+
beta_nms=0.6
|
1289 |
+
|
1290 |
+
|
1291 |
+
# YOLO-4
|
1292 |
+
|
1293 |
+
[route]
|
1294 |
+
layers = 152
|
1295 |
+
|
1296 |
+
[convolutional]
|
1297 |
+
batch_normalize=1
|
1298 |
+
size=3
|
1299 |
+
stride=1
|
1300 |
+
pad=1
|
1301 |
+
filters=512
|
1302 |
+
activation=silu
|
1303 |
+
|
1304 |
+
[shift_channels]
|
1305 |
+
from=165
|
1306 |
+
|
1307 |
+
[convolutional]
|
1308 |
+
size=1
|
1309 |
+
stride=1
|
1310 |
+
pad=1
|
1311 |
+
filters=255
|
1312 |
+
activation=linear
|
1313 |
+
|
1314 |
+
[control_channels]
|
1315 |
+
from=168
|
1316 |
+
|
1317 |
+
[yolo]
|
1318 |
+
mask = 3,4,5
|
1319 |
+
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
|
1320 |
+
classes=80
|
1321 |
+
num=9
|
1322 |
+
jitter=.3
|
1323 |
+
ignore_thresh = .7
|
1324 |
+
truth_thresh = 1
|
1325 |
+
random=1
|
1326 |
+
scale_x_y = 1.05
|
1327 |
+
iou_thresh=0.213
|
1328 |
+
cls_normalizer=1.0
|
1329 |
+
iou_normalizer=0.07
|
1330 |
+
iou_loss=ciou
|
1331 |
+
nms_kind=greedynms
|
1332 |
+
beta_nms=0.6
|
1333 |
+
|
1334 |
+
|
1335 |
+
# YOLO-5
|
1336 |
+
|
1337 |
+
[route]
|
1338 |
+
layers = 163
|
1339 |
+
|
1340 |
+
[convolutional]
|
1341 |
+
batch_normalize=1
|
1342 |
+
size=3
|
1343 |
+
stride=1
|
1344 |
+
pad=1
|
1345 |
+
filters=1024
|
1346 |
+
activation=silu
|
1347 |
+
|
1348 |
+
[shift_channels]
|
1349 |
+
from=166
|
1350 |
+
|
1351 |
+
[convolutional]
|
1352 |
+
size=1
|
1353 |
+
stride=1
|
1354 |
+
pad=1
|
1355 |
+
filters=255
|
1356 |
+
activation=linear
|
1357 |
+
|
1358 |
+
[control_channels]
|
1359 |
+
from=169
|
1360 |
+
|
1361 |
+
[yolo]
|
1362 |
+
mask = 6,7,8
|
1363 |
+
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
|
1364 |
+
classes=80
|
1365 |
+
num=9
|
1366 |
+
jitter=.3
|
1367 |
+
ignore_thresh = .7
|
1368 |
+
truth_thresh = 1
|
1369 |
+
random=1
|
1370 |
+
scale_x_y = 1.05
|
1371 |
+
iou_thresh=0.213
|
1372 |
+
cls_normalizer=1.0
|
1373 |
+
iou_normalizer=0.07
|
1374 |
+
iou_loss=ciou
|
1375 |
+
nms_kind=greedynms
|
1376 |
+
beta_nms=0.6
|
asone/detectors/yolor/cfg/yolor_csp_x.cfg
ADDED
@@ -0,0 +1,1576 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[net]
|
2 |
+
# Testing
|
3 |
+
#batch=1
|
4 |
+
#subdivisions=1
|
5 |
+
# Training
|
6 |
+
batch=64
|
7 |
+
subdivisions=8
|
8 |
+
width=512
|
9 |
+
height=512
|
10 |
+
channels=3
|
11 |
+
momentum=0.949
|
12 |
+
decay=0.0005
|
13 |
+
angle=0
|
14 |
+
saturation = 1.5
|
15 |
+
exposure = 1.5
|
16 |
+
hue=.1
|
17 |
+
|
18 |
+
learning_rate=0.00261
|
19 |
+
burn_in=1000
|
20 |
+
max_batches = 500500
|
21 |
+
policy=steps
|
22 |
+
steps=400000,450000
|
23 |
+
scales=.1,.1
|
24 |
+
|
25 |
+
#cutmix=1
|
26 |
+
mosaic=1
|
27 |
+
|
28 |
+
|
29 |
+
# ============ Backbone ============ #
|
30 |
+
|
31 |
+
# Stem
|
32 |
+
|
33 |
+
# 0
|
34 |
+
[convolutional]
|
35 |
+
batch_normalize=1
|
36 |
+
filters=32
|
37 |
+
size=3
|
38 |
+
stride=1
|
39 |
+
pad=1
|
40 |
+
activation=silu
|
41 |
+
|
42 |
+
# P1
|
43 |
+
|
44 |
+
# Downsample
|
45 |
+
|
46 |
+
[convolutional]
|
47 |
+
batch_normalize=1
|
48 |
+
filters=80
|
49 |
+
size=3
|
50 |
+
stride=2
|
51 |
+
pad=1
|
52 |
+
activation=silu
|
53 |
+
|
54 |
+
# Residual Block
|
55 |
+
|
56 |
+
[convolutional]
|
57 |
+
batch_normalize=1
|
58 |
+
filters=40
|
59 |
+
size=1
|
60 |
+
stride=1
|
61 |
+
pad=1
|
62 |
+
activation=silu
|
63 |
+
|
64 |
+
[convolutional]
|
65 |
+
batch_normalize=1
|
66 |
+
filters=80
|
67 |
+
size=3
|
68 |
+
stride=1
|
69 |
+
pad=1
|
70 |
+
activation=silu
|
71 |
+
|
72 |
+
# 4 (previous+1+3k)
|
73 |
+
[shortcut]
|
74 |
+
from=-3
|
75 |
+
activation=linear
|
76 |
+
|
77 |
+
# P2
|
78 |
+
|
79 |
+
# Downsample
|
80 |
+
|
81 |
+
[convolutional]
|
82 |
+
batch_normalize=1
|
83 |
+
filters=160
|
84 |
+
size=3
|
85 |
+
stride=2
|
86 |
+
pad=1
|
87 |
+
activation=silu
|
88 |
+
|
89 |
+
# Split
|
90 |
+
|
91 |
+
[convolutional]
|
92 |
+
batch_normalize=1
|
93 |
+
filters=80
|
94 |
+
size=1
|
95 |
+
stride=1
|
96 |
+
pad=1
|
97 |
+
activation=silu
|
98 |
+
|
99 |
+
[route]
|
100 |
+
layers = -2
|
101 |
+
|
102 |
+
[convolutional]
|
103 |
+
batch_normalize=1
|
104 |
+
filters=80
|
105 |
+
size=1
|
106 |
+
stride=1
|
107 |
+
pad=1
|
108 |
+
activation=silu
|
109 |
+
|
110 |
+
# Residual Block
|
111 |
+
|
112 |
+
[convolutional]
|
113 |
+
batch_normalize=1
|
114 |
+
filters=80
|
115 |
+
size=1
|
116 |
+
stride=1
|
117 |
+
pad=1
|
118 |
+
activation=silu
|
119 |
+
|
120 |
+
[convolutional]
|
121 |
+
batch_normalize=1
|
122 |
+
filters=80
|
123 |
+
size=3
|
124 |
+
stride=1
|
125 |
+
pad=1
|
126 |
+
activation=silu
|
127 |
+
|
128 |
+
[shortcut]
|
129 |
+
from=-3
|
130 |
+
activation=linear
|
131 |
+
|
132 |
+
[convolutional]
|
133 |
+
batch_normalize=1
|
134 |
+
filters=80
|
135 |
+
size=1
|
136 |
+
stride=1
|
137 |
+
pad=1
|
138 |
+
activation=silu
|
139 |
+
|
140 |
+
[convolutional]
|
141 |
+
batch_normalize=1
|
142 |
+
filters=80
|
143 |
+
size=3
|
144 |
+
stride=1
|
145 |
+
pad=1
|
146 |
+
activation=silu
|
147 |
+
|
148 |
+
[shortcut]
|
149 |
+
from=-3
|
150 |
+
activation=linear
|
151 |
+
|
152 |
+
[convolutional]
|
153 |
+
batch_normalize=1
|
154 |
+
filters=80
|
155 |
+
size=1
|
156 |
+
stride=1
|
157 |
+
pad=1
|
158 |
+
activation=silu
|
159 |
+
|
160 |
+
[convolutional]
|
161 |
+
batch_normalize=1
|
162 |
+
filters=80
|
163 |
+
size=3
|
164 |
+
stride=1
|
165 |
+
pad=1
|
166 |
+
activation=silu
|
167 |
+
|
168 |
+
[shortcut]
|
169 |
+
from=-3
|
170 |
+
activation=linear
|
171 |
+
|
172 |
+
# Transition first
|
173 |
+
|
174 |
+
[convolutional]
|
175 |
+
batch_normalize=1
|
176 |
+
filters=80
|
177 |
+
size=1
|
178 |
+
stride=1
|
179 |
+
pad=1
|
180 |
+
activation=silu
|
181 |
+
|
182 |
+
# Merge [-1, -(3k+4)]
|
183 |
+
|
184 |
+
[route]
|
185 |
+
layers = -1,-13
|
186 |
+
|
187 |
+
# Transition last
|
188 |
+
|
189 |
+
# 20 (previous+7+3k)
|
190 |
+
[convolutional]
|
191 |
+
batch_normalize=1
|
192 |
+
filters=160
|
193 |
+
size=1
|
194 |
+
stride=1
|
195 |
+
pad=1
|
196 |
+
activation=silu
|
197 |
+
|
198 |
+
# P3
|
199 |
+
|
200 |
+
# Downsample
|
201 |
+
|
202 |
+
[convolutional]
|
203 |
+
batch_normalize=1
|
204 |
+
filters=320
|
205 |
+
size=3
|
206 |
+
stride=2
|
207 |
+
pad=1
|
208 |
+
activation=silu
|
209 |
+
|
210 |
+
# Split
|
211 |
+
|
212 |
+
[convolutional]
|
213 |
+
batch_normalize=1
|
214 |
+
filters=160
|
215 |
+
size=1
|
216 |
+
stride=1
|
217 |
+
pad=1
|
218 |
+
activation=silu
|
219 |
+
|
220 |
+
[route]
|
221 |
+
layers = -2
|
222 |
+
|
223 |
+
[convolutional]
|
224 |
+
batch_normalize=1
|
225 |
+
filters=160
|
226 |
+
size=1
|
227 |
+
stride=1
|
228 |
+
pad=1
|
229 |
+
activation=silu
|
230 |
+
|
231 |
+
# Residual Block
|
232 |
+
|
233 |
+
[convolutional]
|
234 |
+
batch_normalize=1
|
235 |
+
filters=160
|
236 |
+
size=1
|
237 |
+
stride=1
|
238 |
+
pad=1
|
239 |
+
activation=silu
|
240 |
+
|
241 |
+
[convolutional]
|
242 |
+
batch_normalize=1
|
243 |
+
filters=160
|
244 |
+
size=3
|
245 |
+
stride=1
|
246 |
+
pad=1
|
247 |
+
activation=silu
|
248 |
+
|
249 |
+
[shortcut]
|
250 |
+
from=-3
|
251 |
+
activation=linear
|
252 |
+
|
253 |
+
[convolutional]
|
254 |
+
batch_normalize=1
|
255 |
+
filters=160
|
256 |
+
size=1
|
257 |
+
stride=1
|
258 |
+
pad=1
|
259 |
+
activation=silu
|
260 |
+
|
261 |
+
[convolutional]
|
262 |
+
batch_normalize=1
|
263 |
+
filters=160
|
264 |
+
size=3
|
265 |
+
stride=1
|
266 |
+
pad=1
|
267 |
+
activation=silu
|
268 |
+
|
269 |
+
[shortcut]
|
270 |
+
from=-3
|
271 |
+
activation=linear
|
272 |
+
|
273 |
+
[convolutional]
|
274 |
+
batch_normalize=1
|
275 |
+
filters=160
|
276 |
+
size=1
|
277 |
+
stride=1
|
278 |
+
pad=1
|
279 |
+
activation=silu
|
280 |
+
|
281 |
+
[convolutional]
|
282 |
+
batch_normalize=1
|
283 |
+
filters=160
|
284 |
+
size=3
|
285 |
+
stride=1
|
286 |
+
pad=1
|
287 |
+
activation=silu
|
288 |
+
|
289 |
+
[shortcut]
|
290 |
+
from=-3
|
291 |
+
activation=linear
|
292 |
+
|
293 |
+
[convolutional]
|
294 |
+
batch_normalize=1
|
295 |
+
filters=160
|
296 |
+
size=1
|
297 |
+
stride=1
|
298 |
+
pad=1
|
299 |
+
activation=silu
|
300 |
+
|
301 |
+
[convolutional]
|
302 |
+
batch_normalize=1
|
303 |
+
filters=160
|
304 |
+
size=3
|
305 |
+
stride=1
|
306 |
+
pad=1
|
307 |
+
activation=silu
|
308 |
+
|
309 |
+
[shortcut]
|
310 |
+
from=-3
|
311 |
+
activation=linear
|
312 |
+
|
313 |
+
[convolutional]
|
314 |
+
batch_normalize=1
|
315 |
+
filters=160
|
316 |
+
size=1
|
317 |
+
stride=1
|
318 |
+
pad=1
|
319 |
+
activation=silu
|
320 |
+
|
321 |
+
[convolutional]
|
322 |
+
batch_normalize=1
|
323 |
+
filters=160
|
324 |
+
size=3
|
325 |
+
stride=1
|
326 |
+
pad=1
|
327 |
+
activation=silu
|
328 |
+
|
329 |
+
[shortcut]
|
330 |
+
from=-3
|
331 |
+
activation=linear
|
332 |
+
|
333 |
+
[convolutional]
|
334 |
+
batch_normalize=1
|
335 |
+
filters=160
|
336 |
+
size=1
|
337 |
+
stride=1
|
338 |
+
pad=1
|
339 |
+
activation=silu
|
340 |
+
|
341 |
+
[convolutional]
|
342 |
+
batch_normalize=1
|
343 |
+
filters=160
|
344 |
+
size=3
|
345 |
+
stride=1
|
346 |
+
pad=1
|
347 |
+
activation=silu
|
348 |
+
|
349 |
+
[shortcut]
|
350 |
+
from=-3
|
351 |
+
activation=linear
|
352 |
+
|
353 |
+
[convolutional]
|
354 |
+
batch_normalize=1
|
355 |
+
filters=160
|
356 |
+
size=1
|
357 |
+
stride=1
|
358 |
+
pad=1
|
359 |
+
activation=silu
|
360 |
+
|
361 |
+
[convolutional]
|
362 |
+
batch_normalize=1
|
363 |
+
filters=160
|
364 |
+
size=3
|
365 |
+
stride=1
|
366 |
+
pad=1
|
367 |
+
activation=silu
|
368 |
+
|
369 |
+
[shortcut]
|
370 |
+
from=-3
|
371 |
+
activation=linear
|
372 |
+
|
373 |
+
[convolutional]
|
374 |
+
batch_normalize=1
|
375 |
+
filters=160
|
376 |
+
size=1
|
377 |
+
stride=1
|
378 |
+
pad=1
|
379 |
+
activation=silu
|
380 |
+
|
381 |
+
[convolutional]
|
382 |
+
batch_normalize=1
|
383 |
+
filters=160
|
384 |
+
size=3
|
385 |
+
stride=1
|
386 |
+
pad=1
|
387 |
+
activation=silu
|
388 |
+
|
389 |
+
[shortcut]
|
390 |
+
from=-3
|
391 |
+
activation=linear
|
392 |
+
|
393 |
+
[convolutional]
|
394 |
+
batch_normalize=1
|
395 |
+
filters=160
|
396 |
+
size=1
|
397 |
+
stride=1
|
398 |
+
pad=1
|
399 |
+
activation=silu
|
400 |
+
|
401 |
+
[convolutional]
|
402 |
+
batch_normalize=1
|
403 |
+
filters=160
|
404 |
+
size=3
|
405 |
+
stride=1
|
406 |
+
pad=1
|
407 |
+
activation=silu
|
408 |
+
|
409 |
+
[shortcut]
|
410 |
+
from=-3
|
411 |
+
activation=linear
|
412 |
+
|
413 |
+
[convolutional]
|
414 |
+
batch_normalize=1
|
415 |
+
filters=160
|
416 |
+
size=1
|
417 |
+
stride=1
|
418 |
+
pad=1
|
419 |
+
activation=silu
|
420 |
+
|
421 |
+
[convolutional]
|
422 |
+
batch_normalize=1
|
423 |
+
filters=160
|
424 |
+
size=3
|
425 |
+
stride=1
|
426 |
+
pad=1
|
427 |
+
activation=silu
|
428 |
+
|
429 |
+
[shortcut]
|
430 |
+
from=-3
|
431 |
+
activation=linear
|
432 |
+
|
433 |
+
# Transition first
|
434 |
+
|
435 |
+
[convolutional]
|
436 |
+
batch_normalize=1
|
437 |
+
filters=160
|
438 |
+
size=1
|
439 |
+
stride=1
|
440 |
+
pad=1
|
441 |
+
activation=silu
|
442 |
+
|
443 |
+
# Merge [-1 -(4+3k)]
|
444 |
+
|
445 |
+
[route]
|
446 |
+
layers = -1,-34
|
447 |
+
|
448 |
+
# Transition last
|
449 |
+
|
450 |
+
# 57 (previous+7+3k)
|
451 |
+
[convolutional]
|
452 |
+
batch_normalize=1
|
453 |
+
filters=320
|
454 |
+
size=1
|
455 |
+
stride=1
|
456 |
+
pad=1
|
457 |
+
activation=silu
|
458 |
+
|
459 |
+
# P4
|
460 |
+
|
461 |
+
# Downsample
|
462 |
+
|
463 |
+
[convolutional]
|
464 |
+
batch_normalize=1
|
465 |
+
filters=640
|
466 |
+
size=3
|
467 |
+
stride=2
|
468 |
+
pad=1
|
469 |
+
activation=silu
|
470 |
+
|
471 |
+
# Split
|
472 |
+
|
473 |
+
[convolutional]
|
474 |
+
batch_normalize=1
|
475 |
+
filters=320
|
476 |
+
size=1
|
477 |
+
stride=1
|
478 |
+
pad=1
|
479 |
+
activation=silu
|
480 |
+
|
481 |
+
[route]
|
482 |
+
layers = -2
|
483 |
+
|
484 |
+
[convolutional]
|
485 |
+
batch_normalize=1
|
486 |
+
filters=320
|
487 |
+
size=1
|
488 |
+
stride=1
|
489 |
+
pad=1
|
490 |
+
activation=silu
|
491 |
+
|
492 |
+
# Residual Block
|
493 |
+
|
494 |
+
[convolutional]
|
495 |
+
batch_normalize=1
|
496 |
+
filters=320
|
497 |
+
size=1
|
498 |
+
stride=1
|
499 |
+
pad=1
|
500 |
+
activation=silu
|
501 |
+
|
502 |
+
[convolutional]
|
503 |
+
batch_normalize=1
|
504 |
+
filters=320
|
505 |
+
size=3
|
506 |
+
stride=1
|
507 |
+
pad=1
|
508 |
+
activation=silu
|
509 |
+
|
510 |
+
[shortcut]
|
511 |
+
from=-3
|
512 |
+
activation=linear
|
513 |
+
|
514 |
+
[convolutional]
|
515 |
+
batch_normalize=1
|
516 |
+
filters=320
|
517 |
+
size=1
|
518 |
+
stride=1
|
519 |
+
pad=1
|
520 |
+
activation=silu
|
521 |
+
|
522 |
+
[convolutional]
|
523 |
+
batch_normalize=1
|
524 |
+
filters=320
|
525 |
+
size=3
|
526 |
+
stride=1
|
527 |
+
pad=1
|
528 |
+
activation=silu
|
529 |
+
|
530 |
+
[shortcut]
|
531 |
+
from=-3
|
532 |
+
activation=linear
|
533 |
+
|
534 |
+
[convolutional]
|
535 |
+
batch_normalize=1
|
536 |
+
filters=320
|
537 |
+
size=1
|
538 |
+
stride=1
|
539 |
+
pad=1
|
540 |
+
activation=silu
|
541 |
+
|
542 |
+
[convolutional]
|
543 |
+
batch_normalize=1
|
544 |
+
filters=320
|
545 |
+
size=3
|
546 |
+
stride=1
|
547 |
+
pad=1
|
548 |
+
activation=silu
|
549 |
+
|
550 |
+
[shortcut]
|
551 |
+
from=-3
|
552 |
+
activation=linear
|
553 |
+
|
554 |
+
[convolutional]
|
555 |
+
batch_normalize=1
|
556 |
+
filters=320
|
557 |
+
size=1
|
558 |
+
stride=1
|
559 |
+
pad=1
|
560 |
+
activation=silu
|
561 |
+
|
562 |
+
[convolutional]
|
563 |
+
batch_normalize=1
|
564 |
+
filters=320
|
565 |
+
size=3
|
566 |
+
stride=1
|
567 |
+
pad=1
|
568 |
+
activation=silu
|
569 |
+
|
570 |
+
[shortcut]
|
571 |
+
from=-3
|
572 |
+
activation=linear
|
573 |
+
|
574 |
+
[convolutional]
|
575 |
+
batch_normalize=1
|
576 |
+
filters=320
|
577 |
+
size=1
|
578 |
+
stride=1
|
579 |
+
pad=1
|
580 |
+
activation=silu
|
581 |
+
|
582 |
+
[convolutional]
|
583 |
+
batch_normalize=1
|
584 |
+
filters=320
|
585 |
+
size=3
|
586 |
+
stride=1
|
587 |
+
pad=1
|
588 |
+
activation=silu
|
589 |
+
|
590 |
+
[shortcut]
|
591 |
+
from=-3
|
592 |
+
activation=linear
|
593 |
+
|
594 |
+
[convolutional]
|
595 |
+
batch_normalize=1
|
596 |
+
filters=320
|
597 |
+
size=1
|
598 |
+
stride=1
|
599 |
+
pad=1
|
600 |
+
activation=silu
|
601 |
+
|
602 |
+
[convolutional]
|
603 |
+
batch_normalize=1
|
604 |
+
filters=320
|
605 |
+
size=3
|
606 |
+
stride=1
|
607 |
+
pad=1
|
608 |
+
activation=silu
|
609 |
+
|
610 |
+
[shortcut]
|
611 |
+
from=-3
|
612 |
+
activation=linear
|
613 |
+
|
614 |
+
[convolutional]
|
615 |
+
batch_normalize=1
|
616 |
+
filters=320
|
617 |
+
size=1
|
618 |
+
stride=1
|
619 |
+
pad=1
|
620 |
+
activation=silu
|
621 |
+
|
622 |
+
[convolutional]
|
623 |
+
batch_normalize=1
|
624 |
+
filters=320
|
625 |
+
size=3
|
626 |
+
stride=1
|
627 |
+
pad=1
|
628 |
+
activation=silu
|
629 |
+
|
630 |
+
[shortcut]
|
631 |
+
from=-3
|
632 |
+
activation=linear
|
633 |
+
|
634 |
+
[convolutional]
|
635 |
+
batch_normalize=1
|
636 |
+
filters=320
|
637 |
+
size=1
|
638 |
+
stride=1
|
639 |
+
pad=1
|
640 |
+
activation=silu
|
641 |
+
|
642 |
+
[convolutional]
|
643 |
+
batch_normalize=1
|
644 |
+
filters=320
|
645 |
+
size=3
|
646 |
+
stride=1
|
647 |
+
pad=1
|
648 |
+
activation=silu
|
649 |
+
|
650 |
+
[shortcut]
|
651 |
+
from=-3
|
652 |
+
activation=linear
|
653 |
+
|
654 |
+
[convolutional]
|
655 |
+
batch_normalize=1
|
656 |
+
filters=320
|
657 |
+
size=1
|
658 |
+
stride=1
|
659 |
+
pad=1
|
660 |
+
activation=silu
|
661 |
+
|
662 |
+
[convolutional]
|
663 |
+
batch_normalize=1
|
664 |
+
filters=320
|
665 |
+
size=3
|
666 |
+
stride=1
|
667 |
+
pad=1
|
668 |
+
activation=silu
|
669 |
+
|
670 |
+
[shortcut]
|
671 |
+
from=-3
|
672 |
+
activation=linear
|
673 |
+
|
674 |
+
[convolutional]
|
675 |
+
batch_normalize=1
|
676 |
+
filters=320
|
677 |
+
size=1
|
678 |
+
stride=1
|
679 |
+
pad=1
|
680 |
+
activation=silu
|
681 |
+
|
682 |
+
[convolutional]
|
683 |
+
batch_normalize=1
|
684 |
+
filters=320
|
685 |
+
size=3
|
686 |
+
stride=1
|
687 |
+
pad=1
|
688 |
+
activation=silu
|
689 |
+
|
690 |
+
[shortcut]
|
691 |
+
from=-3
|
692 |
+
activation=linear
|
693 |
+
|
694 |
+
# Transition first
|
695 |
+
|
696 |
+
[convolutional]
|
697 |
+
batch_normalize=1
|
698 |
+
filters=320
|
699 |
+
size=1
|
700 |
+
stride=1
|
701 |
+
pad=1
|
702 |
+
activation=silu
|
703 |
+
|
704 |
+
# Merge [-1 -(3k+4)]
|
705 |
+
|
706 |
+
[route]
|
707 |
+
layers = -1,-34
|
708 |
+
|
709 |
+
# Transition last
|
710 |
+
|
711 |
+
# 94 (previous+7+3k)
|
712 |
+
[convolutional]
|
713 |
+
batch_normalize=1
|
714 |
+
filters=640
|
715 |
+
size=1
|
716 |
+
stride=1
|
717 |
+
pad=1
|
718 |
+
activation=silu
|
719 |
+
|
720 |
+
# P5
|
721 |
+
|
722 |
+
# Downsample
|
723 |
+
|
724 |
+
[convolutional]
|
725 |
+
batch_normalize=1
|
726 |
+
filters=1280
|
727 |
+
size=3
|
728 |
+
stride=2
|
729 |
+
pad=1
|
730 |
+
activation=silu
|
731 |
+
|
732 |
+
# Split
|
733 |
+
|
734 |
+
[convolutional]
|
735 |
+
batch_normalize=1
|
736 |
+
filters=640
|
737 |
+
size=1
|
738 |
+
stride=1
|
739 |
+
pad=1
|
740 |
+
activation=silu
|
741 |
+
|
742 |
+
[route]
|
743 |
+
layers = -2
|
744 |
+
|
745 |
+
[convolutional]
|
746 |
+
batch_normalize=1
|
747 |
+
filters=640
|
748 |
+
size=1
|
749 |
+
stride=1
|
750 |
+
pad=1
|
751 |
+
activation=silu
|
752 |
+
|
753 |
+
# Residual Block
|
754 |
+
|
755 |
+
[convolutional]
|
756 |
+
batch_normalize=1
|
757 |
+
filters=640
|
758 |
+
size=1
|
759 |
+
stride=1
|
760 |
+
pad=1
|
761 |
+
activation=silu
|
762 |
+
|
763 |
+
[convolutional]
|
764 |
+
batch_normalize=1
|
765 |
+
filters=640
|
766 |
+
size=3
|
767 |
+
stride=1
|
768 |
+
pad=1
|
769 |
+
activation=silu
|
770 |
+
|
771 |
+
[shortcut]
|
772 |
+
from=-3
|
773 |
+
activation=linear
|
774 |
+
|
775 |
+
[convolutional]
|
776 |
+
batch_normalize=1
|
777 |
+
filters=640
|
778 |
+
size=1
|
779 |
+
stride=1
|
780 |
+
pad=1
|
781 |
+
activation=silu
|
782 |
+
|
783 |
+
[convolutional]
|
784 |
+
batch_normalize=1
|
785 |
+
filters=640
|
786 |
+
size=3
|
787 |
+
stride=1
|
788 |
+
pad=1
|
789 |
+
activation=silu
|
790 |
+
|
791 |
+
[shortcut]
|
792 |
+
from=-3
|
793 |
+
activation=linear
|
794 |
+
|
795 |
+
[convolutional]
|
796 |
+
batch_normalize=1
|
797 |
+
filters=640
|
798 |
+
size=1
|
799 |
+
stride=1
|
800 |
+
pad=1
|
801 |
+
activation=silu
|
802 |
+
|
803 |
+
[convolutional]
|
804 |
+
batch_normalize=1
|
805 |
+
filters=640
|
806 |
+
size=3
|
807 |
+
stride=1
|
808 |
+
pad=1
|
809 |
+
activation=silu
|
810 |
+
|
811 |
+
[shortcut]
|
812 |
+
from=-3
|
813 |
+
activation=linear
|
814 |
+
|
815 |
+
[convolutional]
|
816 |
+
batch_normalize=1
|
817 |
+
filters=640
|
818 |
+
size=1
|
819 |
+
stride=1
|
820 |
+
pad=1
|
821 |
+
activation=silu
|
822 |
+
|
823 |
+
[convolutional]
|
824 |
+
batch_normalize=1
|
825 |
+
filters=640
|
826 |
+
size=3
|
827 |
+
stride=1
|
828 |
+
pad=1
|
829 |
+
activation=silu
|
830 |
+
|
831 |
+
[shortcut]
|
832 |
+
from=-3
|
833 |
+
activation=linear
|
834 |
+
|
835 |
+
[convolutional]
|
836 |
+
batch_normalize=1
|
837 |
+
filters=640
|
838 |
+
size=1
|
839 |
+
stride=1
|
840 |
+
pad=1
|
841 |
+
activation=silu
|
842 |
+
|
843 |
+
[convolutional]
|
844 |
+
batch_normalize=1
|
845 |
+
filters=640
|
846 |
+
size=3
|
847 |
+
stride=1
|
848 |
+
pad=1
|
849 |
+
activation=silu
|
850 |
+
|
851 |
+
[shortcut]
|
852 |
+
from=-3
|
853 |
+
activation=linear
|
854 |
+
|
855 |
+
# Transition first
|
856 |
+
|
857 |
+
[convolutional]
|
858 |
+
batch_normalize=1
|
859 |
+
filters=640
|
860 |
+
size=1
|
861 |
+
stride=1
|
862 |
+
pad=1
|
863 |
+
activation=silu
|
864 |
+
|
865 |
+
# Merge [-1 -(3k+4)]
|
866 |
+
|
867 |
+
[route]
|
868 |
+
layers = -1,-19
|
869 |
+
|
870 |
+
# Transition last
|
871 |
+
|
872 |
+
# 116 (previous+7+3k)
|
873 |
+
[convolutional]
|
874 |
+
batch_normalize=1
|
875 |
+
filters=1280
|
876 |
+
size=1
|
877 |
+
stride=1
|
878 |
+
pad=1
|
879 |
+
activation=silu
|
880 |
+
|
881 |
+
# ============ End of Backbone ============ #
|
882 |
+
|
883 |
+
# ============ Neck ============ #
|
884 |
+
|
885 |
+
# CSPSPP
|
886 |
+
|
887 |
+
[convolutional]
|
888 |
+
batch_normalize=1
|
889 |
+
filters=640
|
890 |
+
size=1
|
891 |
+
stride=1
|
892 |
+
pad=1
|
893 |
+
activation=silu
|
894 |
+
|
895 |
+
[route]
|
896 |
+
layers = -2
|
897 |
+
|
898 |
+
[convolutional]
|
899 |
+
batch_normalize=1
|
900 |
+
filters=640
|
901 |
+
size=1
|
902 |
+
stride=1
|
903 |
+
pad=1
|
904 |
+
activation=silu
|
905 |
+
|
906 |
+
[convolutional]
|
907 |
+
batch_normalize=1
|
908 |
+
size=3
|
909 |
+
stride=1
|
910 |
+
pad=1
|
911 |
+
filters=640
|
912 |
+
activation=silu
|
913 |
+
|
914 |
+
[convolutional]
|
915 |
+
batch_normalize=1
|
916 |
+
filters=640
|
917 |
+
size=1
|
918 |
+
stride=1
|
919 |
+
pad=1
|
920 |
+
activation=silu
|
921 |
+
|
922 |
+
### SPP ###
|
923 |
+
[maxpool]
|
924 |
+
stride=1
|
925 |
+
size=5
|
926 |
+
|
927 |
+
[route]
|
928 |
+
layers=-2
|
929 |
+
|
930 |
+
[maxpool]
|
931 |
+
stride=1
|
932 |
+
size=9
|
933 |
+
|
934 |
+
[route]
|
935 |
+
layers=-4
|
936 |
+
|
937 |
+
[maxpool]
|
938 |
+
stride=1
|
939 |
+
size=13
|
940 |
+
|
941 |
+
[route]
|
942 |
+
layers=-1,-3,-5,-6
|
943 |
+
### End SPP ###
|
944 |
+
|
945 |
+
[convolutional]
|
946 |
+
batch_normalize=1
|
947 |
+
filters=640
|
948 |
+
size=1
|
949 |
+
stride=1
|
950 |
+
pad=1
|
951 |
+
activation=silu
|
952 |
+
|
953 |
+
[convolutional]
|
954 |
+
batch_normalize=1
|
955 |
+
size=3
|
956 |
+
stride=1
|
957 |
+
pad=1
|
958 |
+
filters=640
|
959 |
+
activation=silu
|
960 |
+
|
961 |
+
[convolutional]
|
962 |
+
batch_normalize=1
|
963 |
+
filters=640
|
964 |
+
size=1
|
965 |
+
stride=1
|
966 |
+
pad=1
|
967 |
+
activation=silu
|
968 |
+
|
969 |
+
[convolutional]
|
970 |
+
batch_normalize=1
|
971 |
+
size=3
|
972 |
+
stride=1
|
973 |
+
pad=1
|
974 |
+
filters=640
|
975 |
+
activation=silu
|
976 |
+
|
977 |
+
[route]
|
978 |
+
layers = -1, -15
|
979 |
+
|
980 |
+
# 133 (previous+6+5+2k)
|
981 |
+
[convolutional]
|
982 |
+
batch_normalize=1
|
983 |
+
filters=640
|
984 |
+
size=1
|
985 |
+
stride=1
|
986 |
+
pad=1
|
987 |
+
activation=silu
|
988 |
+
|
989 |
+
# End of CSPSPP
|
990 |
+
|
991 |
+
|
992 |
+
# FPN-4
|
993 |
+
|
994 |
+
[convolutional]
|
995 |
+
batch_normalize=1
|
996 |
+
filters=320
|
997 |
+
size=1
|
998 |
+
stride=1
|
999 |
+
pad=1
|
1000 |
+
activation=silu
|
1001 |
+
|
1002 |
+
[upsample]
|
1003 |
+
stride=2
|
1004 |
+
|
1005 |
+
[route]
|
1006 |
+
layers = 94
|
1007 |
+
|
1008 |
+
[convolutional]
|
1009 |
+
batch_normalize=1
|
1010 |
+
filters=320
|
1011 |
+
size=1
|
1012 |
+
stride=1
|
1013 |
+
pad=1
|
1014 |
+
activation=silu
|
1015 |
+
|
1016 |
+
[route]
|
1017 |
+
layers = -1, -3
|
1018 |
+
|
1019 |
+
[convolutional]
|
1020 |
+
batch_normalize=1
|
1021 |
+
filters=320
|
1022 |
+
size=1
|
1023 |
+
stride=1
|
1024 |
+
pad=1
|
1025 |
+
activation=silu
|
1026 |
+
|
1027 |
+
# Split
|
1028 |
+
|
1029 |
+
[convolutional]
|
1030 |
+
batch_normalize=1
|
1031 |
+
filters=320
|
1032 |
+
size=1
|
1033 |
+
stride=1
|
1034 |
+
pad=1
|
1035 |
+
activation=silu
|
1036 |
+
|
1037 |
+
[route]
|
1038 |
+
layers = -2
|
1039 |
+
|
1040 |
+
# Plain Block
|
1041 |
+
|
1042 |
+
[convolutional]
|
1043 |
+
batch_normalize=1
|
1044 |
+
filters=320
|
1045 |
+
size=1
|
1046 |
+
stride=1
|
1047 |
+
pad=1
|
1048 |
+
activation=silu
|
1049 |
+
|
1050 |
+
[convolutional]
|
1051 |
+
batch_normalize=1
|
1052 |
+
size=3
|
1053 |
+
stride=1
|
1054 |
+
pad=1
|
1055 |
+
filters=320
|
1056 |
+
activation=silu
|
1057 |
+
|
1058 |
+
[convolutional]
|
1059 |
+
batch_normalize=1
|
1060 |
+
filters=320
|
1061 |
+
size=1
|
1062 |
+
stride=1
|
1063 |
+
pad=1
|
1064 |
+
activation=silu
|
1065 |
+
|
1066 |
+
[convolutional]
|
1067 |
+
batch_normalize=1
|
1068 |
+
size=3
|
1069 |
+
stride=1
|
1070 |
+
pad=1
|
1071 |
+
filters=320
|
1072 |
+
activation=silu
|
1073 |
+
|
1074 |
+
[convolutional]
|
1075 |
+
batch_normalize=1
|
1076 |
+
filters=320
|
1077 |
+
size=1
|
1078 |
+
stride=1
|
1079 |
+
pad=1
|
1080 |
+
activation=silu
|
1081 |
+
|
1082 |
+
[convolutional]
|
1083 |
+
batch_normalize=1
|
1084 |
+
size=3
|
1085 |
+
stride=1
|
1086 |
+
pad=1
|
1087 |
+
filters=320
|
1088 |
+
activation=silu
|
1089 |
+
|
1090 |
+
# Merge [-1, -(2k+2)]
|
1091 |
+
|
1092 |
+
[route]
|
1093 |
+
layers = -1, -8
|
1094 |
+
|
1095 |
+
# Transition last
|
1096 |
+
|
1097 |
+
# 149 (previous+6+4+2k)
|
1098 |
+
[convolutional]
|
1099 |
+
batch_normalize=1
|
1100 |
+
filters=320
|
1101 |
+
size=1
|
1102 |
+
stride=1
|
1103 |
+
pad=1
|
1104 |
+
activation=silu
|
1105 |
+
|
1106 |
+
|
1107 |
+
# FPN-3
|
1108 |
+
|
1109 |
+
[convolutional]
|
1110 |
+
batch_normalize=1
|
1111 |
+
filters=160
|
1112 |
+
size=1
|
1113 |
+
stride=1
|
1114 |
+
pad=1
|
1115 |
+
activation=silu
|
1116 |
+
|
1117 |
+
[upsample]
|
1118 |
+
stride=2
|
1119 |
+
|
1120 |
+
[route]
|
1121 |
+
layers = 57
|
1122 |
+
|
1123 |
+
[convolutional]
|
1124 |
+
batch_normalize=1
|
1125 |
+
filters=160
|
1126 |
+
size=1
|
1127 |
+
stride=1
|
1128 |
+
pad=1
|
1129 |
+
activation=silu
|
1130 |
+
|
1131 |
+
[route]
|
1132 |
+
layers = -1, -3
|
1133 |
+
|
1134 |
+
[convolutional]
|
1135 |
+
batch_normalize=1
|
1136 |
+
filters=160
|
1137 |
+
size=1
|
1138 |
+
stride=1
|
1139 |
+
pad=1
|
1140 |
+
activation=silu
|
1141 |
+
|
1142 |
+
# Split
|
1143 |
+
|
1144 |
+
[convolutional]
|
1145 |
+
batch_normalize=1
|
1146 |
+
filters=160
|
1147 |
+
size=1
|
1148 |
+
stride=1
|
1149 |
+
pad=1
|
1150 |
+
activation=silu
|
1151 |
+
|
1152 |
+
[route]
|
1153 |
+
layers = -2
|
1154 |
+
|
1155 |
+
# Plain Block
|
1156 |
+
|
1157 |
+
[convolutional]
|
1158 |
+
batch_normalize=1
|
1159 |
+
filters=160
|
1160 |
+
size=1
|
1161 |
+
stride=1
|
1162 |
+
pad=1
|
1163 |
+
activation=silu
|
1164 |
+
|
1165 |
+
[convolutional]
|
1166 |
+
batch_normalize=1
|
1167 |
+
size=3
|
1168 |
+
stride=1
|
1169 |
+
pad=1
|
1170 |
+
filters=160
|
1171 |
+
activation=silu
|
1172 |
+
|
1173 |
+
[convolutional]
|
1174 |
+
batch_normalize=1
|
1175 |
+
filters=160
|
1176 |
+
size=1
|
1177 |
+
stride=1
|
1178 |
+
pad=1
|
1179 |
+
activation=silu
|
1180 |
+
|
1181 |
+
[convolutional]
|
1182 |
+
batch_normalize=1
|
1183 |
+
size=3
|
1184 |
+
stride=1
|
1185 |
+
pad=1
|
1186 |
+
filters=160
|
1187 |
+
activation=silu
|
1188 |
+
|
1189 |
+
[convolutional]
|
1190 |
+
batch_normalize=1
|
1191 |
+
filters=160
|
1192 |
+
size=1
|
1193 |
+
stride=1
|
1194 |
+
pad=1
|
1195 |
+
activation=silu
|
1196 |
+
|
1197 |
+
[convolutional]
|
1198 |
+
batch_normalize=1
|
1199 |
+
size=3
|
1200 |
+
stride=1
|
1201 |
+
pad=1
|
1202 |
+
filters=160
|
1203 |
+
activation=silu
|
1204 |
+
|
1205 |
+
# Merge [-1, -(2k+2)]
|
1206 |
+
|
1207 |
+
[route]
|
1208 |
+
layers = -1, -8
|
1209 |
+
|
1210 |
+
# Transition last
|
1211 |
+
|
1212 |
+
# 165 (previous+6+4+2k)
|
1213 |
+
[convolutional]
|
1214 |
+
batch_normalize=1
|
1215 |
+
filters=160
|
1216 |
+
size=1
|
1217 |
+
stride=1
|
1218 |
+
pad=1
|
1219 |
+
activation=silu
|
1220 |
+
|
1221 |
+
|
1222 |
+
# PAN-4
|
1223 |
+
|
1224 |
+
[convolutional]
|
1225 |
+
batch_normalize=1
|
1226 |
+
size=3
|
1227 |
+
stride=2
|
1228 |
+
pad=1
|
1229 |
+
filters=320
|
1230 |
+
activation=silu
|
1231 |
+
|
1232 |
+
[route]
|
1233 |
+
layers = -1, 149
|
1234 |
+
|
1235 |
+
[convolutional]
|
1236 |
+
batch_normalize=1
|
1237 |
+
filters=320
|
1238 |
+
size=1
|
1239 |
+
stride=1
|
1240 |
+
pad=1
|
1241 |
+
activation=silu
|
1242 |
+
|
1243 |
+
# Split
|
1244 |
+
|
1245 |
+
[convolutional]
|
1246 |
+
batch_normalize=1
|
1247 |
+
filters=320
|
1248 |
+
size=1
|
1249 |
+
stride=1
|
1250 |
+
pad=1
|
1251 |
+
activation=silu
|
1252 |
+
|
1253 |
+
[route]
|
1254 |
+
layers = -2
|
1255 |
+
|
1256 |
+
# Plain Block
|
1257 |
+
|
1258 |
+
[convolutional]
|
1259 |
+
batch_normalize=1
|
1260 |
+
filters=320
|
1261 |
+
size=1
|
1262 |
+
stride=1
|
1263 |
+
pad=1
|
1264 |
+
activation=silu
|
1265 |
+
|
1266 |
+
[convolutional]
|
1267 |
+
batch_normalize=1
|
1268 |
+
size=3
|
1269 |
+
stride=1
|
1270 |
+
pad=1
|
1271 |
+
filters=320
|
1272 |
+
activation=silu
|
1273 |
+
|
1274 |
+
[convolutional]
|
1275 |
+
batch_normalize=1
|
1276 |
+
filters=320
|
1277 |
+
size=1
|
1278 |
+
stride=1
|
1279 |
+
pad=1
|
1280 |
+
activation=silu
|
1281 |
+
|
1282 |
+
[convolutional]
|
1283 |
+
batch_normalize=1
|
1284 |
+
size=3
|
1285 |
+
stride=1
|
1286 |
+
pad=1
|
1287 |
+
filters=320
|
1288 |
+
activation=silu
|
1289 |
+
|
1290 |
+
[convolutional]
|
1291 |
+
batch_normalize=1
|
1292 |
+
filters=320
|
1293 |
+
size=1
|
1294 |
+
stride=1
|
1295 |
+
pad=1
|
1296 |
+
activation=silu
|
1297 |
+
|
1298 |
+
[convolutional]
|
1299 |
+
batch_normalize=1
|
1300 |
+
size=3
|
1301 |
+
stride=1
|
1302 |
+
pad=1
|
1303 |
+
filters=320
|
1304 |
+
activation=silu
|
1305 |
+
|
1306 |
+
[route]
|
1307 |
+
layers = -1,-8
|
1308 |
+
|
1309 |
+
# Transition last
|
1310 |
+
|
1311 |
+
# 178 (previous+3+4+2k)
|
1312 |
+
[convolutional]
|
1313 |
+
batch_normalize=1
|
1314 |
+
filters=320
|
1315 |
+
size=1
|
1316 |
+
stride=1
|
1317 |
+
pad=1
|
1318 |
+
activation=silu
|
1319 |
+
|
1320 |
+
|
1321 |
+
# PAN-5
|
1322 |
+
|
1323 |
+
[convolutional]
|
1324 |
+
batch_normalize=1
|
1325 |
+
size=3
|
1326 |
+
stride=2
|
1327 |
+
pad=1
|
1328 |
+
filters=640
|
1329 |
+
activation=silu
|
1330 |
+
|
1331 |
+
[route]
|
1332 |
+
layers = -1, 133
|
1333 |
+
|
1334 |
+
[convolutional]
|
1335 |
+
batch_normalize=1
|
1336 |
+
filters=640
|
1337 |
+
size=1
|
1338 |
+
stride=1
|
1339 |
+
pad=1
|
1340 |
+
activation=silu
|
1341 |
+
|
1342 |
+
# Split
|
1343 |
+
|
1344 |
+
[convolutional]
|
1345 |
+
batch_normalize=1
|
1346 |
+
filters=640
|
1347 |
+
size=1
|
1348 |
+
stride=1
|
1349 |
+
pad=1
|
1350 |
+
activation=silu
|
1351 |
+
|
1352 |
+
[route]
|
1353 |
+
layers = -2
|
1354 |
+
|
1355 |
+
# Plain Block
|
1356 |
+
|
1357 |
+
[convolutional]
|
1358 |
+
batch_normalize=1
|
1359 |
+
filters=640
|
1360 |
+
size=1
|
1361 |
+
stride=1
|
1362 |
+
pad=1
|
1363 |
+
activation=silu
|
1364 |
+
|
1365 |
+
[convolutional]
|
1366 |
+
batch_normalize=1
|
1367 |
+
size=3
|
1368 |
+
stride=1
|
1369 |
+
pad=1
|
1370 |
+
filters=640
|
1371 |
+
activation=silu
|
1372 |
+
|
1373 |
+
[convolutional]
|
1374 |
+
batch_normalize=1
|
1375 |
+
filters=640
|
1376 |
+
size=1
|
1377 |
+
stride=1
|
1378 |
+
pad=1
|
1379 |
+
activation=silu
|
1380 |
+
|
1381 |
+
[convolutional]
|
1382 |
+
batch_normalize=1
|
1383 |
+
size=3
|
1384 |
+
stride=1
|
1385 |
+
pad=1
|
1386 |
+
filters=640
|
1387 |
+
activation=silu
|
1388 |
+
|
1389 |
+
[convolutional]
|
1390 |
+
batch_normalize=1
|
1391 |
+
filters=640
|
1392 |
+
size=1
|
1393 |
+
stride=1
|
1394 |
+
pad=1
|
1395 |
+
activation=silu
|
1396 |
+
|
1397 |
+
[convolutional]
|
1398 |
+
batch_normalize=1
|
1399 |
+
size=3
|
1400 |
+
stride=1
|
1401 |
+
pad=1
|
1402 |
+
filters=640
|
1403 |
+
activation=silu
|
1404 |
+
|
1405 |
+
[route]
|
1406 |
+
layers = -1,-8
|
1407 |
+
|
1408 |
+
# Transition last
|
1409 |
+
|
1410 |
+
# 191 (previous+3+4+2k)
|
1411 |
+
[convolutional]
|
1412 |
+
batch_normalize=1
|
1413 |
+
filters=640
|
1414 |
+
size=1
|
1415 |
+
stride=1
|
1416 |
+
pad=1
|
1417 |
+
activation=silu
|
1418 |
+
|
1419 |
+
# ============ End of Neck ============ #
|
1420 |
+
|
1421 |
+
# 192
|
1422 |
+
[implicit_add]
|
1423 |
+
filters=320
|
1424 |
+
|
1425 |
+
# 193
|
1426 |
+
[implicit_add]
|
1427 |
+
filters=640
|
1428 |
+
|
1429 |
+
# 194
|
1430 |
+
[implicit_add]
|
1431 |
+
filters=1280
|
1432 |
+
|
1433 |
+
# 195
|
1434 |
+
[implicit_mul]
|
1435 |
+
filters=255
|
1436 |
+
|
1437 |
+
# 196
|
1438 |
+
[implicit_mul]
|
1439 |
+
filters=255
|
1440 |
+
|
1441 |
+
# 197
|
1442 |
+
[implicit_mul]
|
1443 |
+
filters=255
|
1444 |
+
|
1445 |
+
# ============ Head ============ #
|
1446 |
+
|
1447 |
+
# YOLO-3
|
1448 |
+
|
1449 |
+
[route]
|
1450 |
+
layers = 165
|
1451 |
+
|
1452 |
+
[convolutional]
|
1453 |
+
batch_normalize=1
|
1454 |
+
size=3
|
1455 |
+
stride=1
|
1456 |
+
pad=1
|
1457 |
+
filters=320
|
1458 |
+
activation=silu
|
1459 |
+
|
1460 |
+
[shift_channels]
|
1461 |
+
from=192
|
1462 |
+
|
1463 |
+
[convolutional]
|
1464 |
+
size=1
|
1465 |
+
stride=1
|
1466 |
+
pad=1
|
1467 |
+
filters=255
|
1468 |
+
activation=linear
|
1469 |
+
|
1470 |
+
[control_channels]
|
1471 |
+
from=195
|
1472 |
+
|
1473 |
+
[yolo]
|
1474 |
+
mask = 0,1,2
|
1475 |
+
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
|
1476 |
+
classes=80
|
1477 |
+
num=9
|
1478 |
+
jitter=.3
|
1479 |
+
ignore_thresh = .7
|
1480 |
+
truth_thresh = 1
|
1481 |
+
random=1
|
1482 |
+
scale_x_y = 1.05
|
1483 |
+
iou_thresh=0.213
|
1484 |
+
cls_normalizer=1.0
|
1485 |
+
iou_normalizer=0.07
|
1486 |
+
iou_loss=ciou
|
1487 |
+
nms_kind=greedynms
|
1488 |
+
beta_nms=0.6
|
1489 |
+
|
1490 |
+
|
1491 |
+
# YOLO-4
|
1492 |
+
|
1493 |
+
[route]
|
1494 |
+
layers = 178
|
1495 |
+
|
1496 |
+
[convolutional]
|
1497 |
+
batch_normalize=1
|
1498 |
+
size=3
|
1499 |
+
stride=1
|
1500 |
+
pad=1
|
1501 |
+
filters=640
|
1502 |
+
activation=silu
|
1503 |
+
|
1504 |
+
[shift_channels]
|
1505 |
+
from=193
|
1506 |
+
|
1507 |
+
[convolutional]
|
1508 |
+
size=1
|
1509 |
+
stride=1
|
1510 |
+
pad=1
|
1511 |
+
filters=255
|
1512 |
+
activation=linear
|
1513 |
+
|
1514 |
+
[control_channels]
|
1515 |
+
from=196
|
1516 |
+
|
1517 |
+
[yolo]
|
1518 |
+
mask = 3,4,5
|
1519 |
+
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
|
1520 |
+
classes=80
|
1521 |
+
num=9
|
1522 |
+
jitter=.3
|
1523 |
+
ignore_thresh = .7
|
1524 |
+
truth_thresh = 1
|
1525 |
+
random=1
|
1526 |
+
scale_x_y = 1.05
|
1527 |
+
iou_thresh=0.213
|
1528 |
+
cls_normalizer=1.0
|
1529 |
+
iou_normalizer=0.07
|
1530 |
+
iou_loss=ciou
|
1531 |
+
nms_kind=greedynms
|
1532 |
+
beta_nms=0.6
|
1533 |
+
|
1534 |
+
|
1535 |
+
# YOLO-5
|
1536 |
+
|
1537 |
+
[route]
|
1538 |
+
layers = 191
|
1539 |
+
|
1540 |
+
[convolutional]
|
1541 |
+
batch_normalize=1
|
1542 |
+
size=3
|
1543 |
+
stride=1
|
1544 |
+
pad=1
|
1545 |
+
filters=1280
|
1546 |
+
activation=silu
|
1547 |
+
|
1548 |
+
[shift_channels]
|
1549 |
+
from=194
|
1550 |
+
|
1551 |
+
[convolutional]
|
1552 |
+
size=1
|
1553 |
+
stride=1
|
1554 |
+
pad=1
|
1555 |
+
filters=255
|
1556 |
+
activation=linear
|
1557 |
+
|
1558 |
+
[control_channels]
|
1559 |
+
from=197
|
1560 |
+
|
1561 |
+
[yolo]
|
1562 |
+
mask = 6,7,8
|
1563 |
+
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
|
1564 |
+
classes=80
|
1565 |
+
num=9
|
1566 |
+
jitter=.3
|
1567 |
+
ignore_thresh = .7
|
1568 |
+
truth_thresh = 1
|
1569 |
+
random=1
|
1570 |
+
scale_x_y = 1.05
|
1571 |
+
iou_thresh=0.213
|
1572 |
+
cls_normalizer=1.0
|
1573 |
+
iou_normalizer=0.07
|
1574 |
+
iou_loss=ciou
|
1575 |
+
nms_kind=greedynms
|
1576 |
+
beta_nms=0.6
|
asone/detectors/yolor/cfg/yolor_p6.cfg
ADDED
@@ -0,0 +1,1760 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[net]
|
2 |
+
batch=64
|
3 |
+
subdivisions=8
|
4 |
+
width=1280
|
5 |
+
height=1280
|
6 |
+
channels=3
|
7 |
+
momentum=0.949
|
8 |
+
decay=0.0005
|
9 |
+
angle=0
|
10 |
+
saturation = 1.5
|
11 |
+
exposure = 1.5
|
12 |
+
hue=.1
|
13 |
+
|
14 |
+
learning_rate=0.00261
|
15 |
+
burn_in=1000
|
16 |
+
max_batches = 500500
|
17 |
+
policy=steps
|
18 |
+
steps=400000,450000
|
19 |
+
scales=.1,.1
|
20 |
+
|
21 |
+
mosaic=1
|
22 |
+
|
23 |
+
|
24 |
+
# ============ Backbone ============ #
|
25 |
+
|
26 |
+
# Stem
|
27 |
+
|
28 |
+
# P1
|
29 |
+
|
30 |
+
# Downsample
|
31 |
+
|
32 |
+
# 0
|
33 |
+
[reorg]
|
34 |
+
|
35 |
+
[convolutional]
|
36 |
+
batch_normalize=1
|
37 |
+
filters=64
|
38 |
+
size=3
|
39 |
+
stride=1
|
40 |
+
pad=1
|
41 |
+
activation=silu
|
42 |
+
|
43 |
+
|
44 |
+
# P2
|
45 |
+
|
46 |
+
# Downsample
|
47 |
+
|
48 |
+
[convolutional]
|
49 |
+
batch_normalize=1
|
50 |
+
filters=128
|
51 |
+
size=3
|
52 |
+
stride=2
|
53 |
+
pad=1
|
54 |
+
activation=silu
|
55 |
+
|
56 |
+
# Split
|
57 |
+
|
58 |
+
[convolutional]
|
59 |
+
batch_normalize=1
|
60 |
+
filters=64
|
61 |
+
size=1
|
62 |
+
stride=1
|
63 |
+
pad=1
|
64 |
+
activation=silu
|
65 |
+
|
66 |
+
[route]
|
67 |
+
layers = -2
|
68 |
+
|
69 |
+
[convolutional]
|
70 |
+
batch_normalize=1
|
71 |
+
filters=64
|
72 |
+
size=1
|
73 |
+
stride=1
|
74 |
+
pad=1
|
75 |
+
activation=silu
|
76 |
+
|
77 |
+
# Residual Block
|
78 |
+
|
79 |
+
[convolutional]
|
80 |
+
batch_normalize=1
|
81 |
+
filters=64
|
82 |
+
size=1
|
83 |
+
stride=1
|
84 |
+
pad=1
|
85 |
+
activation=silu
|
86 |
+
|
87 |
+
[convolutional]
|
88 |
+
batch_normalize=1
|
89 |
+
filters=64
|
90 |
+
size=3
|
91 |
+
stride=1
|
92 |
+
pad=1
|
93 |
+
activation=silu
|
94 |
+
|
95 |
+
[shortcut]
|
96 |
+
from=-3
|
97 |
+
activation=linear
|
98 |
+
|
99 |
+
[convolutional]
|
100 |
+
batch_normalize=1
|
101 |
+
filters=64
|
102 |
+
size=1
|
103 |
+
stride=1
|
104 |
+
pad=1
|
105 |
+
activation=silu
|
106 |
+
|
107 |
+
[convolutional]
|
108 |
+
batch_normalize=1
|
109 |
+
filters=64
|
110 |
+
size=3
|
111 |
+
stride=1
|
112 |
+
pad=1
|
113 |
+
activation=silu
|
114 |
+
|
115 |
+
[shortcut]
|
116 |
+
from=-3
|
117 |
+
activation=linear
|
118 |
+
|
119 |
+
[convolutional]
|
120 |
+
batch_normalize=1
|
121 |
+
filters=64
|
122 |
+
size=1
|
123 |
+
stride=1
|
124 |
+
pad=1
|
125 |
+
activation=silu
|
126 |
+
|
127 |
+
[convolutional]
|
128 |
+
batch_normalize=1
|
129 |
+
filters=64
|
130 |
+
size=3
|
131 |
+
stride=1
|
132 |
+
pad=1
|
133 |
+
activation=silu
|
134 |
+
|
135 |
+
[shortcut]
|
136 |
+
from=-3
|
137 |
+
activation=linear
|
138 |
+
|
139 |
+
# Transition first
|
140 |
+
#
|
141 |
+
#[convolutional]
|
142 |
+
#batch_normalize=1
|
143 |
+
#filters=64
|
144 |
+
#size=1
|
145 |
+
#stride=1
|
146 |
+
#pad=1
|
147 |
+
#activation=silu
|
148 |
+
|
149 |
+
# Merge [-1, -(3k+3)]
|
150 |
+
|
151 |
+
[route]
|
152 |
+
layers = -1,-12
|
153 |
+
|
154 |
+
# Transition last
|
155 |
+
|
156 |
+
# 16 (previous+6+3k)
|
157 |
+
[convolutional]
|
158 |
+
batch_normalize=1
|
159 |
+
filters=128
|
160 |
+
size=1
|
161 |
+
stride=1
|
162 |
+
pad=1
|
163 |
+
activation=silu
|
164 |
+
|
165 |
+
|
166 |
+
# P3
|
167 |
+
|
168 |
+
# Downsample
|
169 |
+
|
170 |
+
[convolutional]
|
171 |
+
batch_normalize=1
|
172 |
+
filters=256
|
173 |
+
size=3
|
174 |
+
stride=2
|
175 |
+
pad=1
|
176 |
+
activation=silu
|
177 |
+
|
178 |
+
# Split
|
179 |
+
|
180 |
+
[convolutional]
|
181 |
+
batch_normalize=1
|
182 |
+
filters=128
|
183 |
+
size=1
|
184 |
+
stride=1
|
185 |
+
pad=1
|
186 |
+
activation=silu
|
187 |
+
|
188 |
+
[route]
|
189 |
+
layers = -2
|
190 |
+
|
191 |
+
[convolutional]
|
192 |
+
batch_normalize=1
|
193 |
+
filters=128
|
194 |
+
size=1
|
195 |
+
stride=1
|
196 |
+
pad=1
|
197 |
+
activation=silu
|
198 |
+
|
199 |
+
# Residual Block
|
200 |
+
|
201 |
+
[convolutional]
|
202 |
+
batch_normalize=1
|
203 |
+
filters=128
|
204 |
+
size=1
|
205 |
+
stride=1
|
206 |
+
pad=1
|
207 |
+
activation=silu
|
208 |
+
|
209 |
+
[convolutional]
|
210 |
+
batch_normalize=1
|
211 |
+
filters=128
|
212 |
+
size=3
|
213 |
+
stride=1
|
214 |
+
pad=1
|
215 |
+
activation=silu
|
216 |
+
|
217 |
+
[shortcut]
|
218 |
+
from=-3
|
219 |
+
activation=linear
|
220 |
+
|
221 |
+
[convolutional]
|
222 |
+
batch_normalize=1
|
223 |
+
filters=128
|
224 |
+
size=1
|
225 |
+
stride=1
|
226 |
+
pad=1
|
227 |
+
activation=silu
|
228 |
+
|
229 |
+
[convolutional]
|
230 |
+
batch_normalize=1
|
231 |
+
filters=128
|
232 |
+
size=3
|
233 |
+
stride=1
|
234 |
+
pad=1
|
235 |
+
activation=silu
|
236 |
+
|
237 |
+
[shortcut]
|
238 |
+
from=-3
|
239 |
+
activation=linear
|
240 |
+
|
241 |
+
[convolutional]
|
242 |
+
batch_normalize=1
|
243 |
+
filters=128
|
244 |
+
size=1
|
245 |
+
stride=1
|
246 |
+
pad=1
|
247 |
+
activation=silu
|
248 |
+
|
249 |
+
[convolutional]
|
250 |
+
batch_normalize=1
|
251 |
+
filters=128
|
252 |
+
size=3
|
253 |
+
stride=1
|
254 |
+
pad=1
|
255 |
+
activation=silu
|
256 |
+
|
257 |
+
[shortcut]
|
258 |
+
from=-3
|
259 |
+
activation=linear
|
260 |
+
|
261 |
+
[convolutional]
|
262 |
+
batch_normalize=1
|
263 |
+
filters=128
|
264 |
+
size=1
|
265 |
+
stride=1
|
266 |
+
pad=1
|
267 |
+
activation=silu
|
268 |
+
|
269 |
+
[convolutional]
|
270 |
+
batch_normalize=1
|
271 |
+
filters=128
|
272 |
+
size=3
|
273 |
+
stride=1
|
274 |
+
pad=1
|
275 |
+
activation=silu
|
276 |
+
|
277 |
+
[shortcut]
|
278 |
+
from=-3
|
279 |
+
activation=linear
|
280 |
+
|
281 |
+
[convolutional]
|
282 |
+
batch_normalize=1
|
283 |
+
filters=128
|
284 |
+
size=1
|
285 |
+
stride=1
|
286 |
+
pad=1
|
287 |
+
activation=silu
|
288 |
+
|
289 |
+
[convolutional]
|
290 |
+
batch_normalize=1
|
291 |
+
filters=128
|
292 |
+
size=3
|
293 |
+
stride=1
|
294 |
+
pad=1
|
295 |
+
activation=silu
|
296 |
+
|
297 |
+
[shortcut]
|
298 |
+
from=-3
|
299 |
+
activation=linear
|
300 |
+
|
301 |
+
[convolutional]
|
302 |
+
batch_normalize=1
|
303 |
+
filters=128
|
304 |
+
size=1
|
305 |
+
stride=1
|
306 |
+
pad=1
|
307 |
+
activation=silu
|
308 |
+
|
309 |
+
[convolutional]
|
310 |
+
batch_normalize=1
|
311 |
+
filters=128
|
312 |
+
size=3
|
313 |
+
stride=1
|
314 |
+
pad=1
|
315 |
+
activation=silu
|
316 |
+
|
317 |
+
[shortcut]
|
318 |
+
from=-3
|
319 |
+
activation=linear
|
320 |
+
|
321 |
+
[convolutional]
|
322 |
+
batch_normalize=1
|
323 |
+
filters=128
|
324 |
+
size=1
|
325 |
+
stride=1
|
326 |
+
pad=1
|
327 |
+
activation=silu
|
328 |
+
|
329 |
+
[convolutional]
|
330 |
+
batch_normalize=1
|
331 |
+
filters=128
|
332 |
+
size=3
|
333 |
+
stride=1
|
334 |
+
pad=1
|
335 |
+
activation=silu
|
336 |
+
|
337 |
+
[shortcut]
|
338 |
+
from=-3
|
339 |
+
activation=linear
|
340 |
+
|
341 |
+
# Transition first
|
342 |
+
#
|
343 |
+
#[convolutional]
|
344 |
+
#batch_normalize=1
|
345 |
+
#filters=128
|
346 |
+
#size=1
|
347 |
+
#stride=1
|
348 |
+
#pad=1
|
349 |
+
#activation=silu
|
350 |
+
|
351 |
+
# Merge [-1, -(3k+3)]
|
352 |
+
|
353 |
+
[route]
|
354 |
+
layers = -1,-24
|
355 |
+
|
356 |
+
# Transition last
|
357 |
+
|
358 |
+
# 43 (previous+6+3k)
|
359 |
+
[convolutional]
|
360 |
+
batch_normalize=1
|
361 |
+
filters=256
|
362 |
+
size=1
|
363 |
+
stride=1
|
364 |
+
pad=1
|
365 |
+
activation=silu
|
366 |
+
|
367 |
+
|
368 |
+
# P4
|
369 |
+
|
370 |
+
# Downsample
|
371 |
+
|
372 |
+
[convolutional]
|
373 |
+
batch_normalize=1
|
374 |
+
filters=384
|
375 |
+
size=3
|
376 |
+
stride=2
|
377 |
+
pad=1
|
378 |
+
activation=silu
|
379 |
+
|
380 |
+
# Split
|
381 |
+
|
382 |
+
[convolutional]
|
383 |
+
batch_normalize=1
|
384 |
+
filters=192
|
385 |
+
size=1
|
386 |
+
stride=1
|
387 |
+
pad=1
|
388 |
+
activation=silu
|
389 |
+
|
390 |
+
[route]
|
391 |
+
layers = -2
|
392 |
+
|
393 |
+
[convolutional]
|
394 |
+
batch_normalize=1
|
395 |
+
filters=192
|
396 |
+
size=1
|
397 |
+
stride=1
|
398 |
+
pad=1
|
399 |
+
activation=silu
|
400 |
+
|
401 |
+
# Residual Block
|
402 |
+
|
403 |
+
[convolutional]
|
404 |
+
batch_normalize=1
|
405 |
+
filters=192
|
406 |
+
size=1
|
407 |
+
stride=1
|
408 |
+
pad=1
|
409 |
+
activation=silu
|
410 |
+
|
411 |
+
[convolutional]
|
412 |
+
batch_normalize=1
|
413 |
+
filters=192
|
414 |
+
size=3
|
415 |
+
stride=1
|
416 |
+
pad=1
|
417 |
+
activation=silu
|
418 |
+
|
419 |
+
[shortcut]
|
420 |
+
from=-3
|
421 |
+
activation=linear
|
422 |
+
|
423 |
+
[convolutional]
|
424 |
+
batch_normalize=1
|
425 |
+
filters=192
|
426 |
+
size=1
|
427 |
+
stride=1
|
428 |
+
pad=1
|
429 |
+
activation=silu
|
430 |
+
|
431 |
+
[convolutional]
|
432 |
+
batch_normalize=1
|
433 |
+
filters=192
|
434 |
+
size=3
|
435 |
+
stride=1
|
436 |
+
pad=1
|
437 |
+
activation=silu
|
438 |
+
|
439 |
+
[shortcut]
|
440 |
+
from=-3
|
441 |
+
activation=linear
|
442 |
+
|
443 |
+
[convolutional]
|
444 |
+
batch_normalize=1
|
445 |
+
filters=192
|
446 |
+
size=1
|
447 |
+
stride=1
|
448 |
+
pad=1
|
449 |
+
activation=silu
|
450 |
+
|
451 |
+
[convolutional]
|
452 |
+
batch_normalize=1
|
453 |
+
filters=192
|
454 |
+
size=3
|
455 |
+
stride=1
|
456 |
+
pad=1
|
457 |
+
activation=silu
|
458 |
+
|
459 |
+
[shortcut]
|
460 |
+
from=-3
|
461 |
+
activation=linear
|
462 |
+
|
463 |
+
[convolutional]
|
464 |
+
batch_normalize=1
|
465 |
+
filters=192
|
466 |
+
size=1
|
467 |
+
stride=1
|
468 |
+
pad=1
|
469 |
+
activation=silu
|
470 |
+
|
471 |
+
[convolutional]
|
472 |
+
batch_normalize=1
|
473 |
+
filters=192
|
474 |
+
size=3
|
475 |
+
stride=1
|
476 |
+
pad=1
|
477 |
+
activation=silu
|
478 |
+
|
479 |
+
[shortcut]
|
480 |
+
from=-3
|
481 |
+
activation=linear
|
482 |
+
|
483 |
+
[convolutional]
|
484 |
+
batch_normalize=1
|
485 |
+
filters=192
|
486 |
+
size=1
|
487 |
+
stride=1
|
488 |
+
pad=1
|
489 |
+
activation=silu
|
490 |
+
|
491 |
+
[convolutional]
|
492 |
+
batch_normalize=1
|
493 |
+
filters=192
|
494 |
+
size=3
|
495 |
+
stride=1
|
496 |
+
pad=1
|
497 |
+
activation=silu
|
498 |
+
|
499 |
+
[shortcut]
|
500 |
+
from=-3
|
501 |
+
activation=linear
|
502 |
+
|
503 |
+
[convolutional]
|
504 |
+
batch_normalize=1
|
505 |
+
filters=192
|
506 |
+
size=1
|
507 |
+
stride=1
|
508 |
+
pad=1
|
509 |
+
activation=silu
|
510 |
+
|
511 |
+
[convolutional]
|
512 |
+
batch_normalize=1
|
513 |
+
filters=192
|
514 |
+
size=3
|
515 |
+
stride=1
|
516 |
+
pad=1
|
517 |
+
activation=silu
|
518 |
+
|
519 |
+
[shortcut]
|
520 |
+
from=-3
|
521 |
+
activation=linear
|
522 |
+
|
523 |
+
[convolutional]
|
524 |
+
batch_normalize=1
|
525 |
+
filters=192
|
526 |
+
size=1
|
527 |
+
stride=1
|
528 |
+
pad=1
|
529 |
+
activation=silu
|
530 |
+
|
531 |
+
[convolutional]
|
532 |
+
batch_normalize=1
|
533 |
+
filters=192
|
534 |
+
size=3
|
535 |
+
stride=1
|
536 |
+
pad=1
|
537 |
+
activation=silu
|
538 |
+
|
539 |
+
[shortcut]
|
540 |
+
from=-3
|
541 |
+
activation=linear
|
542 |
+
|
543 |
+
# Transition first
|
544 |
+
#
|
545 |
+
#[convolutional]
|
546 |
+
#batch_normalize=1
|
547 |
+
#filters=192
|
548 |
+
#size=1
|
549 |
+
#stride=1
|
550 |
+
#pad=1
|
551 |
+
#activation=silu
|
552 |
+
|
553 |
+
# Merge [-1, -(3k+3)]
|
554 |
+
|
555 |
+
[route]
|
556 |
+
layers = -1,-24
|
557 |
+
|
558 |
+
# Transition last
|
559 |
+
|
560 |
+
# 70 (previous+6+3k)
|
561 |
+
[convolutional]
|
562 |
+
batch_normalize=1
|
563 |
+
filters=384
|
564 |
+
size=1
|
565 |
+
stride=1
|
566 |
+
pad=1
|
567 |
+
activation=silu
|
568 |
+
|
569 |
+
|
570 |
+
# P5
|
571 |
+
|
572 |
+
# Downsample
|
573 |
+
|
574 |
+
[convolutional]
|
575 |
+
batch_normalize=1
|
576 |
+
filters=512
|
577 |
+
size=3
|
578 |
+
stride=2
|
579 |
+
pad=1
|
580 |
+
activation=silu
|
581 |
+
|
582 |
+
# Split
|
583 |
+
|
584 |
+
[convolutional]
|
585 |
+
batch_normalize=1
|
586 |
+
filters=256
|
587 |
+
size=1
|
588 |
+
stride=1
|
589 |
+
pad=1
|
590 |
+
activation=silu
|
591 |
+
|
592 |
+
[route]
|
593 |
+
layers = -2
|
594 |
+
|
595 |
+
[convolutional]
|
596 |
+
batch_normalize=1
|
597 |
+
filters=256
|
598 |
+
size=1
|
599 |
+
stride=1
|
600 |
+
pad=1
|
601 |
+
activation=silu
|
602 |
+
|
603 |
+
# Residual Block
|
604 |
+
|
605 |
+
[convolutional]
|
606 |
+
batch_normalize=1
|
607 |
+
filters=256
|
608 |
+
size=1
|
609 |
+
stride=1
|
610 |
+
pad=1
|
611 |
+
activation=silu
|
612 |
+
|
613 |
+
[convolutional]
|
614 |
+
batch_normalize=1
|
615 |
+
filters=256
|
616 |
+
size=3
|
617 |
+
stride=1
|
618 |
+
pad=1
|
619 |
+
activation=silu
|
620 |
+
|
621 |
+
[shortcut]
|
622 |
+
from=-3
|
623 |
+
activation=linear
|
624 |
+
|
625 |
+
[convolutional]
|
626 |
+
batch_normalize=1
|
627 |
+
filters=256
|
628 |
+
size=1
|
629 |
+
stride=1
|
630 |
+
pad=1
|
631 |
+
activation=silu
|
632 |
+
|
633 |
+
[convolutional]
|
634 |
+
batch_normalize=1
|
635 |
+
filters=256
|
636 |
+
size=3
|
637 |
+
stride=1
|
638 |
+
pad=1
|
639 |
+
activation=silu
|
640 |
+
|
641 |
+
[shortcut]
|
642 |
+
from=-3
|
643 |
+
activation=linear
|
644 |
+
|
645 |
+
[convolutional]
|
646 |
+
batch_normalize=1
|
647 |
+
filters=256
|
648 |
+
size=1
|
649 |
+
stride=1
|
650 |
+
pad=1
|
651 |
+
activation=silu
|
652 |
+
|
653 |
+
[convolutional]
|
654 |
+
batch_normalize=1
|
655 |
+
filters=256
|
656 |
+
size=3
|
657 |
+
stride=1
|
658 |
+
pad=1
|
659 |
+
activation=silu
|
660 |
+
|
661 |
+
[shortcut]
|
662 |
+
from=-3
|
663 |
+
activation=linear
|
664 |
+
|
665 |
+
# Transition first
|
666 |
+
#
|
667 |
+
#[convolutional]
|
668 |
+
#batch_normalize=1
|
669 |
+
#filters=256
|
670 |
+
#size=1
|
671 |
+
#stride=1
|
672 |
+
#pad=1
|
673 |
+
#activation=silu
|
674 |
+
|
675 |
+
# Merge [-1, -(3k+3)]
|
676 |
+
|
677 |
+
[route]
|
678 |
+
layers = -1,-12
|
679 |
+
|
680 |
+
# Transition last
|
681 |
+
|
682 |
+
# 85 (previous+6+3k)
|
683 |
+
[convolutional]
|
684 |
+
batch_normalize=1
|
685 |
+
filters=512
|
686 |
+
size=1
|
687 |
+
stride=1
|
688 |
+
pad=1
|
689 |
+
activation=silu
|
690 |
+
|
691 |
+
|
692 |
+
# P6
|
693 |
+
|
694 |
+
# Downsample
|
695 |
+
|
696 |
+
[convolutional]
|
697 |
+
batch_normalize=1
|
698 |
+
filters=640
|
699 |
+
size=3
|
700 |
+
stride=2
|
701 |
+
pad=1
|
702 |
+
activation=silu
|
703 |
+
|
704 |
+
# Split
|
705 |
+
|
706 |
+
[convolutional]
|
707 |
+
batch_normalize=1
|
708 |
+
filters=320
|
709 |
+
size=1
|
710 |
+
stride=1
|
711 |
+
pad=1
|
712 |
+
activation=silu
|
713 |
+
|
714 |
+
[route]
|
715 |
+
layers = -2
|
716 |
+
|
717 |
+
[convolutional]
|
718 |
+
batch_normalize=1
|
719 |
+
filters=320
|
720 |
+
size=1
|
721 |
+
stride=1
|
722 |
+
pad=1
|
723 |
+
activation=silu
|
724 |
+
|
725 |
+
# Residual Block
|
726 |
+
|
727 |
+
[convolutional]
|
728 |
+
batch_normalize=1
|
729 |
+
filters=320
|
730 |
+
size=1
|
731 |
+
stride=1
|
732 |
+
pad=1
|
733 |
+
activation=silu
|
734 |
+
|
735 |
+
[convolutional]
|
736 |
+
batch_normalize=1
|
737 |
+
filters=320
|
738 |
+
size=3
|
739 |
+
stride=1
|
740 |
+
pad=1
|
741 |
+
activation=silu
|
742 |
+
|
743 |
+
[shortcut]
|
744 |
+
from=-3
|
745 |
+
activation=linear
|
746 |
+
|
747 |
+
[convolutional]
|
748 |
+
batch_normalize=1
|
749 |
+
filters=320
|
750 |
+
size=1
|
751 |
+
stride=1
|
752 |
+
pad=1
|
753 |
+
activation=silu
|
754 |
+
|
755 |
+
[convolutional]
|
756 |
+
batch_normalize=1
|
757 |
+
filters=320
|
758 |
+
size=3
|
759 |
+
stride=1
|
760 |
+
pad=1
|
761 |
+
activation=silu
|
762 |
+
|
763 |
+
[shortcut]
|
764 |
+
from=-3
|
765 |
+
activation=linear
|
766 |
+
|
767 |
+
[convolutional]
|
768 |
+
batch_normalize=1
|
769 |
+
filters=320
|
770 |
+
size=1
|
771 |
+
stride=1
|
772 |
+
pad=1
|
773 |
+
activation=silu
|
774 |
+
|
775 |
+
[convolutional]
|
776 |
+
batch_normalize=1
|
777 |
+
filters=320
|
778 |
+
size=3
|
779 |
+
stride=1
|
780 |
+
pad=1
|
781 |
+
activation=silu
|
782 |
+
|
783 |
+
[shortcut]
|
784 |
+
from=-3
|
785 |
+
activation=linear
|
786 |
+
|
787 |
+
# Transition first
|
788 |
+
#
|
789 |
+
#[convolutional]
|
790 |
+
#batch_normalize=1
|
791 |
+
#filters=320
|
792 |
+
#size=1
|
793 |
+
#stride=1
|
794 |
+
#pad=1
|
795 |
+
#activation=silu
|
796 |
+
|
797 |
+
# Merge [-1, -(3k+3)]
|
798 |
+
|
799 |
+
[route]
|
800 |
+
layers = -1,-12
|
801 |
+
|
802 |
+
# Transition last
|
803 |
+
|
804 |
+
# 100 (previous+6+3k)
|
805 |
+
[convolutional]
|
806 |
+
batch_normalize=1
|
807 |
+
filters=640
|
808 |
+
size=1
|
809 |
+
stride=1
|
810 |
+
pad=1
|
811 |
+
activation=silu
|
812 |
+
|
813 |
+
# ============ End of Backbone ============ #
|
814 |
+
|
815 |
+
# ============ Neck ============ #
|
816 |
+
|
817 |
+
# CSPSPP
|
818 |
+
|
819 |
+
[convolutional]
|
820 |
+
batch_normalize=1
|
821 |
+
filters=320
|
822 |
+
size=1
|
823 |
+
stride=1
|
824 |
+
pad=1
|
825 |
+
activation=silu
|
826 |
+
|
827 |
+
[route]
|
828 |
+
layers = -2
|
829 |
+
|
830 |
+
[convolutional]
|
831 |
+
batch_normalize=1
|
832 |
+
filters=320
|
833 |
+
size=1
|
834 |
+
stride=1
|
835 |
+
pad=1
|
836 |
+
activation=silu
|
837 |
+
|
838 |
+
[convolutional]
|
839 |
+
batch_normalize=1
|
840 |
+
size=3
|
841 |
+
stride=1
|
842 |
+
pad=1
|
843 |
+
filters=320
|
844 |
+
activation=silu
|
845 |
+
|
846 |
+
[convolutional]
|
847 |
+
batch_normalize=1
|
848 |
+
filters=320
|
849 |
+
size=1
|
850 |
+
stride=1
|
851 |
+
pad=1
|
852 |
+
activation=silu
|
853 |
+
|
854 |
+
### SPP ###
|
855 |
+
[maxpool]
|
856 |
+
stride=1
|
857 |
+
size=5
|
858 |
+
|
859 |
+
[route]
|
860 |
+
layers=-2
|
861 |
+
|
862 |
+
[maxpool]
|
863 |
+
stride=1
|
864 |
+
size=9
|
865 |
+
|
866 |
+
[route]
|
867 |
+
layers=-4
|
868 |
+
|
869 |
+
[maxpool]
|
870 |
+
stride=1
|
871 |
+
size=13
|
872 |
+
|
873 |
+
[route]
|
874 |
+
layers=-1,-3,-5,-6
|
875 |
+
### End SPP ###
|
876 |
+
|
877 |
+
[convolutional]
|
878 |
+
batch_normalize=1
|
879 |
+
filters=320
|
880 |
+
size=1
|
881 |
+
stride=1
|
882 |
+
pad=1
|
883 |
+
activation=silu
|
884 |
+
|
885 |
+
[convolutional]
|
886 |
+
batch_normalize=1
|
887 |
+
size=3
|
888 |
+
stride=1
|
889 |
+
pad=1
|
890 |
+
filters=320
|
891 |
+
activation=silu
|
892 |
+
|
893 |
+
[route]
|
894 |
+
layers = -1, -13
|
895 |
+
|
896 |
+
# 115 (previous+6+5+2k)
|
897 |
+
[convolutional]
|
898 |
+
batch_normalize=1
|
899 |
+
filters=320
|
900 |
+
size=1
|
901 |
+
stride=1
|
902 |
+
pad=1
|
903 |
+
activation=silu
|
904 |
+
|
905 |
+
# End of CSPSPP
|
906 |
+
|
907 |
+
|
908 |
+
# FPN-5
|
909 |
+
|
910 |
+
[convolutional]
|
911 |
+
batch_normalize=1
|
912 |
+
filters=256
|
913 |
+
size=1
|
914 |
+
stride=1
|
915 |
+
pad=1
|
916 |
+
activation=silu
|
917 |
+
|
918 |
+
[upsample]
|
919 |
+
stride=2
|
920 |
+
|
921 |
+
[route]
|
922 |
+
layers = 85
|
923 |
+
|
924 |
+
[convolutional]
|
925 |
+
batch_normalize=1
|
926 |
+
filters=256
|
927 |
+
size=1
|
928 |
+
stride=1
|
929 |
+
pad=1
|
930 |
+
activation=silu
|
931 |
+
|
932 |
+
[route]
|
933 |
+
layers = -1, -3
|
934 |
+
|
935 |
+
[convolutional]
|
936 |
+
batch_normalize=1
|
937 |
+
filters=256
|
938 |
+
size=1
|
939 |
+
stride=1
|
940 |
+
pad=1
|
941 |
+
activation=silu
|
942 |
+
|
943 |
+
# Split
|
944 |
+
|
945 |
+
[convolutional]
|
946 |
+
batch_normalize=1
|
947 |
+
filters=256
|
948 |
+
size=1
|
949 |
+
stride=1
|
950 |
+
pad=1
|
951 |
+
activation=silu
|
952 |
+
|
953 |
+
[route]
|
954 |
+
layers = -2
|
955 |
+
|
956 |
+
# Plain Block
|
957 |
+
|
958 |
+
[convolutional]
|
959 |
+
batch_normalize=1
|
960 |
+
filters=256
|
961 |
+
size=1
|
962 |
+
stride=1
|
963 |
+
pad=1
|
964 |
+
activation=silu
|
965 |
+
|
966 |
+
[convolutional]
|
967 |
+
batch_normalize=1
|
968 |
+
size=3
|
969 |
+
stride=1
|
970 |
+
pad=1
|
971 |
+
filters=256
|
972 |
+
activation=silu
|
973 |
+
|
974 |
+
[convolutional]
|
975 |
+
batch_normalize=1
|
976 |
+
filters=256
|
977 |
+
size=1
|
978 |
+
stride=1
|
979 |
+
pad=1
|
980 |
+
activation=silu
|
981 |
+
|
982 |
+
[convolutional]
|
983 |
+
batch_normalize=1
|
984 |
+
size=3
|
985 |
+
stride=1
|
986 |
+
pad=1
|
987 |
+
filters=256
|
988 |
+
activation=silu
|
989 |
+
|
990 |
+
[convolutional]
|
991 |
+
batch_normalize=1
|
992 |
+
filters=256
|
993 |
+
size=1
|
994 |
+
stride=1
|
995 |
+
pad=1
|
996 |
+
activation=silu
|
997 |
+
|
998 |
+
[convolutional]
|
999 |
+
batch_normalize=1
|
1000 |
+
size=3
|
1001 |
+
stride=1
|
1002 |
+
pad=1
|
1003 |
+
filters=256
|
1004 |
+
activation=silu
|
1005 |
+
|
1006 |
+
# Merge [-1, -(2k+2)]
|
1007 |
+
|
1008 |
+
[route]
|
1009 |
+
layers = -1, -8
|
1010 |
+
|
1011 |
+
# Transition last
|
1012 |
+
|
1013 |
+
# 131 (previous+6+4+2k)
|
1014 |
+
[convolutional]
|
1015 |
+
batch_normalize=1
|
1016 |
+
filters=256
|
1017 |
+
size=1
|
1018 |
+
stride=1
|
1019 |
+
pad=1
|
1020 |
+
activation=silu
|
1021 |
+
|
1022 |
+
|
1023 |
+
# FPN-4
|
1024 |
+
|
1025 |
+
[convolutional]
|
1026 |
+
batch_normalize=1
|
1027 |
+
filters=192
|
1028 |
+
size=1
|
1029 |
+
stride=1
|
1030 |
+
pad=1
|
1031 |
+
activation=silu
|
1032 |
+
|
1033 |
+
[upsample]
|
1034 |
+
stride=2
|
1035 |
+
|
1036 |
+
[route]
|
1037 |
+
layers = 70
|
1038 |
+
|
1039 |
+
[convolutional]
|
1040 |
+
batch_normalize=1
|
1041 |
+
filters=192
|
1042 |
+
size=1
|
1043 |
+
stride=1
|
1044 |
+
pad=1
|
1045 |
+
activation=silu
|
1046 |
+
|
1047 |
+
[route]
|
1048 |
+
layers = -1, -3
|
1049 |
+
|
1050 |
+
[convolutional]
|
1051 |
+
batch_normalize=1
|
1052 |
+
filters=192
|
1053 |
+
size=1
|
1054 |
+
stride=1
|
1055 |
+
pad=1
|
1056 |
+
activation=silu
|
1057 |
+
|
1058 |
+
# Split
|
1059 |
+
|
1060 |
+
[convolutional]
|
1061 |
+
batch_normalize=1
|
1062 |
+
filters=192
|
1063 |
+
size=1
|
1064 |
+
stride=1
|
1065 |
+
pad=1
|
1066 |
+
activation=silu
|
1067 |
+
|
1068 |
+
[route]
|
1069 |
+
layers = -2
|
1070 |
+
|
1071 |
+
# Plain Block
|
1072 |
+
|
1073 |
+
[convolutional]
|
1074 |
+
batch_normalize=1
|
1075 |
+
filters=192
|
1076 |
+
size=1
|
1077 |
+
stride=1
|
1078 |
+
pad=1
|
1079 |
+
activation=silu
|
1080 |
+
|
1081 |
+
[convolutional]
|
1082 |
+
batch_normalize=1
|
1083 |
+
size=3
|
1084 |
+
stride=1
|
1085 |
+
pad=1
|
1086 |
+
filters=192
|
1087 |
+
activation=silu
|
1088 |
+
|
1089 |
+
[convolutional]
|
1090 |
+
batch_normalize=1
|
1091 |
+
filters=192
|
1092 |
+
size=1
|
1093 |
+
stride=1
|
1094 |
+
pad=1
|
1095 |
+
activation=silu
|
1096 |
+
|
1097 |
+
[convolutional]
|
1098 |
+
batch_normalize=1
|
1099 |
+
size=3
|
1100 |
+
stride=1
|
1101 |
+
pad=1
|
1102 |
+
filters=192
|
1103 |
+
activation=silu
|
1104 |
+
|
1105 |
+
[convolutional]
|
1106 |
+
batch_normalize=1
|
1107 |
+
filters=192
|
1108 |
+
size=1
|
1109 |
+
stride=1
|
1110 |
+
pad=1
|
1111 |
+
activation=silu
|
1112 |
+
|
1113 |
+
[convolutional]
|
1114 |
+
batch_normalize=1
|
1115 |
+
size=3
|
1116 |
+
stride=1
|
1117 |
+
pad=1
|
1118 |
+
filters=192
|
1119 |
+
activation=silu
|
1120 |
+
|
1121 |
+
# Merge [-1, -(2k+2)]
|
1122 |
+
|
1123 |
+
[route]
|
1124 |
+
layers = -1, -8
|
1125 |
+
|
1126 |
+
# Transition last
|
1127 |
+
|
1128 |
+
# 147 (previous+6+4+2k)
|
1129 |
+
[convolutional]
|
1130 |
+
batch_normalize=1
|
1131 |
+
filters=192
|
1132 |
+
size=1
|
1133 |
+
stride=1
|
1134 |
+
pad=1
|
1135 |
+
activation=silu
|
1136 |
+
|
1137 |
+
|
1138 |
+
# FPN-3
|
1139 |
+
|
1140 |
+
[convolutional]
|
1141 |
+
batch_normalize=1
|
1142 |
+
filters=128
|
1143 |
+
size=1
|
1144 |
+
stride=1
|
1145 |
+
pad=1
|
1146 |
+
activation=silu
|
1147 |
+
|
1148 |
+
[upsample]
|
1149 |
+
stride=2
|
1150 |
+
|
1151 |
+
[route]
|
1152 |
+
layers = 43
|
1153 |
+
|
1154 |
+
[convolutional]
|
1155 |
+
batch_normalize=1
|
1156 |
+
filters=128
|
1157 |
+
size=1
|
1158 |
+
stride=1
|
1159 |
+
pad=1
|
1160 |
+
activation=silu
|
1161 |
+
|
1162 |
+
[route]
|
1163 |
+
layers = -1, -3
|
1164 |
+
|
1165 |
+
[convolutional]
|
1166 |
+
batch_normalize=1
|
1167 |
+
filters=128
|
1168 |
+
size=1
|
1169 |
+
stride=1
|
1170 |
+
pad=1
|
1171 |
+
activation=silu
|
1172 |
+
|
1173 |
+
# Split
|
1174 |
+
|
1175 |
+
[convolutional]
|
1176 |
+
batch_normalize=1
|
1177 |
+
filters=128
|
1178 |
+
size=1
|
1179 |
+
stride=1
|
1180 |
+
pad=1
|
1181 |
+
activation=silu
|
1182 |
+
|
1183 |
+
[route]
|
1184 |
+
layers = -2
|
1185 |
+
|
1186 |
+
# Plain Block
|
1187 |
+
|
1188 |
+
[convolutional]
|
1189 |
+
batch_normalize=1
|
1190 |
+
filters=128
|
1191 |
+
size=1
|
1192 |
+
stride=1
|
1193 |
+
pad=1
|
1194 |
+
activation=silu
|
1195 |
+
|
1196 |
+
[convolutional]
|
1197 |
+
batch_normalize=1
|
1198 |
+
size=3
|
1199 |
+
stride=1
|
1200 |
+
pad=1
|
1201 |
+
filters=128
|
1202 |
+
activation=silu
|
1203 |
+
|
1204 |
+
[convolutional]
|
1205 |
+
batch_normalize=1
|
1206 |
+
filters=128
|
1207 |
+
size=1
|
1208 |
+
stride=1
|
1209 |
+
pad=1
|
1210 |
+
activation=silu
|
1211 |
+
|
1212 |
+
[convolutional]
|
1213 |
+
batch_normalize=1
|
1214 |
+
size=3
|
1215 |
+
stride=1
|
1216 |
+
pad=1
|
1217 |
+
filters=128
|
1218 |
+
activation=silu
|
1219 |
+
|
1220 |
+
[convolutional]
|
1221 |
+
batch_normalize=1
|
1222 |
+
filters=128
|
1223 |
+
size=1
|
1224 |
+
stride=1
|
1225 |
+
pad=1
|
1226 |
+
activation=silu
|
1227 |
+
|
1228 |
+
[convolutional]
|
1229 |
+
batch_normalize=1
|
1230 |
+
size=3
|
1231 |
+
stride=1
|
1232 |
+
pad=1
|
1233 |
+
filters=128
|
1234 |
+
activation=silu
|
1235 |
+
|
1236 |
+
# Merge [-1, -(2k+2)]
|
1237 |
+
|
1238 |
+
[route]
|
1239 |
+
layers = -1, -8
|
1240 |
+
|
1241 |
+
# Transition last
|
1242 |
+
|
1243 |
+
# 163 (previous+6+4+2k)
|
1244 |
+
[convolutional]
|
1245 |
+
batch_normalize=1
|
1246 |
+
filters=128
|
1247 |
+
size=1
|
1248 |
+
stride=1
|
1249 |
+
pad=1
|
1250 |
+
activation=silu
|
1251 |
+
|
1252 |
+
|
1253 |
+
# PAN-4
|
1254 |
+
|
1255 |
+
[convolutional]
|
1256 |
+
batch_normalize=1
|
1257 |
+
size=3
|
1258 |
+
stride=2
|
1259 |
+
pad=1
|
1260 |
+
filters=192
|
1261 |
+
activation=silu
|
1262 |
+
|
1263 |
+
[route]
|
1264 |
+
layers = -1, 147
|
1265 |
+
|
1266 |
+
[convolutional]
|
1267 |
+
batch_normalize=1
|
1268 |
+
filters=192
|
1269 |
+
size=1
|
1270 |
+
stride=1
|
1271 |
+
pad=1
|
1272 |
+
activation=silu
|
1273 |
+
|
1274 |
+
# Split
|
1275 |
+
|
1276 |
+
[convolutional]
|
1277 |
+
batch_normalize=1
|
1278 |
+
filters=192
|
1279 |
+
size=1
|
1280 |
+
stride=1
|
1281 |
+
pad=1
|
1282 |
+
activation=silu
|
1283 |
+
|
1284 |
+
[route]
|
1285 |
+
layers = -2
|
1286 |
+
|
1287 |
+
# Plain Block
|
1288 |
+
|
1289 |
+
[convolutional]
|
1290 |
+
batch_normalize=1
|
1291 |
+
filters=192
|
1292 |
+
size=1
|
1293 |
+
stride=1
|
1294 |
+
pad=1
|
1295 |
+
activation=silu
|
1296 |
+
|
1297 |
+
[convolutional]
|
1298 |
+
batch_normalize=1
|
1299 |
+
size=3
|
1300 |
+
stride=1
|
1301 |
+
pad=1
|
1302 |
+
filters=192
|
1303 |
+
activation=silu
|
1304 |
+
|
1305 |
+
[convolutional]
|
1306 |
+
batch_normalize=1
|
1307 |
+
filters=192
|
1308 |
+
size=1
|
1309 |
+
stride=1
|
1310 |
+
pad=1
|
1311 |
+
activation=silu
|
1312 |
+
|
1313 |
+
[convolutional]
|
1314 |
+
batch_normalize=1
|
1315 |
+
size=3
|
1316 |
+
stride=1
|
1317 |
+
pad=1
|
1318 |
+
filters=192
|
1319 |
+
activation=silu
|
1320 |
+
|
1321 |
+
[convolutional]
|
1322 |
+
batch_normalize=1
|
1323 |
+
filters=192
|
1324 |
+
size=1
|
1325 |
+
stride=1
|
1326 |
+
pad=1
|
1327 |
+
activation=silu
|
1328 |
+
|
1329 |
+
[convolutional]
|
1330 |
+
batch_normalize=1
|
1331 |
+
size=3
|
1332 |
+
stride=1
|
1333 |
+
pad=1
|
1334 |
+
filters=192
|
1335 |
+
activation=silu
|
1336 |
+
|
1337 |
+
[route]
|
1338 |
+
layers = -1,-8
|
1339 |
+
|
1340 |
+
# Transition last
|
1341 |
+
|
1342 |
+
# 176 (previous+3+4+2k)
|
1343 |
+
[convolutional]
|
1344 |
+
batch_normalize=1
|
1345 |
+
filters=192
|
1346 |
+
size=1
|
1347 |
+
stride=1
|
1348 |
+
pad=1
|
1349 |
+
activation=silu
|
1350 |
+
|
1351 |
+
|
1352 |
+
# PAN-5
|
1353 |
+
|
1354 |
+
[convolutional]
|
1355 |
+
batch_normalize=1
|
1356 |
+
size=3
|
1357 |
+
stride=2
|
1358 |
+
pad=1
|
1359 |
+
filters=256
|
1360 |
+
activation=silu
|
1361 |
+
|
1362 |
+
[route]
|
1363 |
+
layers = -1, 131
|
1364 |
+
|
1365 |
+
[convolutional]
|
1366 |
+
batch_normalize=1
|
1367 |
+
filters=256
|
1368 |
+
size=1
|
1369 |
+
stride=1
|
1370 |
+
pad=1
|
1371 |
+
activation=silu
|
1372 |
+
|
1373 |
+
# Split
|
1374 |
+
|
1375 |
+
[convolutional]
|
1376 |
+
batch_normalize=1
|
1377 |
+
filters=256
|
1378 |
+
size=1
|
1379 |
+
stride=1
|
1380 |
+
pad=1
|
1381 |
+
activation=silu
|
1382 |
+
|
1383 |
+
[route]
|
1384 |
+
layers = -2
|
1385 |
+
|
1386 |
+
# Plain Block
|
1387 |
+
|
1388 |
+
[convolutional]
|
1389 |
+
batch_normalize=1
|
1390 |
+
filters=256
|
1391 |
+
size=1
|
1392 |
+
stride=1
|
1393 |
+
pad=1
|
1394 |
+
activation=silu
|
1395 |
+
|
1396 |
+
[convolutional]
|
1397 |
+
batch_normalize=1
|
1398 |
+
size=3
|
1399 |
+
stride=1
|
1400 |
+
pad=1
|
1401 |
+
filters=256
|
1402 |
+
activation=silu
|
1403 |
+
|
1404 |
+
[convolutional]
|
1405 |
+
batch_normalize=1
|
1406 |
+
filters=256
|
1407 |
+
size=1
|
1408 |
+
stride=1
|
1409 |
+
pad=1
|
1410 |
+
activation=silu
|
1411 |
+
|
1412 |
+
[convolutional]
|
1413 |
+
batch_normalize=1
|
1414 |
+
size=3
|
1415 |
+
stride=1
|
1416 |
+
pad=1
|
1417 |
+
filters=256
|
1418 |
+
activation=silu
|
1419 |
+
|
1420 |
+
[convolutional]
|
1421 |
+
batch_normalize=1
|
1422 |
+
filters=256
|
1423 |
+
size=1
|
1424 |
+
stride=1
|
1425 |
+
pad=1
|
1426 |
+
activation=silu
|
1427 |
+
|
1428 |
+
[convolutional]
|
1429 |
+
batch_normalize=1
|
1430 |
+
size=3
|
1431 |
+
stride=1
|
1432 |
+
pad=1
|
1433 |
+
filters=256
|
1434 |
+
activation=silu
|
1435 |
+
|
1436 |
+
[route]
|
1437 |
+
layers = -1,-8
|
1438 |
+
|
1439 |
+
# Transition last
|
1440 |
+
|
1441 |
+
# 189 (previous+3+4+2k)
|
1442 |
+
[convolutional]
|
1443 |
+
batch_normalize=1
|
1444 |
+
filters=256
|
1445 |
+
size=1
|
1446 |
+
stride=1
|
1447 |
+
pad=1
|
1448 |
+
activation=silu
|
1449 |
+
|
1450 |
+
|
1451 |
+
# PAN-6
|
1452 |
+
|
1453 |
+
[convolutional]
|
1454 |
+
batch_normalize=1
|
1455 |
+
size=3
|
1456 |
+
stride=2
|
1457 |
+
pad=1
|
1458 |
+
filters=320
|
1459 |
+
activation=silu
|
1460 |
+
|
1461 |
+
[route]
|
1462 |
+
layers = -1, 115
|
1463 |
+
|
1464 |
+
[convolutional]
|
1465 |
+
batch_normalize=1
|
1466 |
+
filters=320
|
1467 |
+
size=1
|
1468 |
+
stride=1
|
1469 |
+
pad=1
|
1470 |
+
activation=silu
|
1471 |
+
|
1472 |
+
# Split
|
1473 |
+
|
1474 |
+
[convolutional]
|
1475 |
+
batch_normalize=1
|
1476 |
+
filters=320
|
1477 |
+
size=1
|
1478 |
+
stride=1
|
1479 |
+
pad=1
|
1480 |
+
activation=silu
|
1481 |
+
|
1482 |
+
[route]
|
1483 |
+
layers = -2
|
1484 |
+
|
1485 |
+
# Plain Block
|
1486 |
+
|
1487 |
+
[convolutional]
|
1488 |
+
batch_normalize=1
|
1489 |
+
filters=320
|
1490 |
+
size=1
|
1491 |
+
stride=1
|
1492 |
+
pad=1
|
1493 |
+
activation=silu
|
1494 |
+
|
1495 |
+
[convolutional]
|
1496 |
+
batch_normalize=1
|
1497 |
+
size=3
|
1498 |
+
stride=1
|
1499 |
+
pad=1
|
1500 |
+
filters=320
|
1501 |
+
activation=silu
|
1502 |
+
|
1503 |
+
[convolutional]
|
1504 |
+
batch_normalize=1
|
1505 |
+
filters=320
|
1506 |
+
size=1
|
1507 |
+
stride=1
|
1508 |
+
pad=1
|
1509 |
+
activation=silu
|
1510 |
+
|
1511 |
+
[convolutional]
|
1512 |
+
batch_normalize=1
|
1513 |
+
size=3
|
1514 |
+
stride=1
|
1515 |
+
pad=1
|
1516 |
+
filters=320
|
1517 |
+
activation=silu
|
1518 |
+
|
1519 |
+
[convolutional]
|
1520 |
+
batch_normalize=1
|
1521 |
+
filters=320
|
1522 |
+
size=1
|
1523 |
+
stride=1
|
1524 |
+
pad=1
|
1525 |
+
activation=silu
|
1526 |
+
|
1527 |
+
[convolutional]
|
1528 |
+
batch_normalize=1
|
1529 |
+
size=3
|
1530 |
+
stride=1
|
1531 |
+
pad=1
|
1532 |
+
filters=320
|
1533 |
+
activation=silu
|
1534 |
+
|
1535 |
+
[route]
|
1536 |
+
layers = -1,-8
|
1537 |
+
|
1538 |
+
# Transition last
|
1539 |
+
|
1540 |
+
# 202 (previous+3+4+2k)
|
1541 |
+
[convolutional]
|
1542 |
+
batch_normalize=1
|
1543 |
+
filters=320
|
1544 |
+
size=1
|
1545 |
+
stride=1
|
1546 |
+
pad=1
|
1547 |
+
activation=silu
|
1548 |
+
|
1549 |
+
# ============ End of Neck ============ #
|
1550 |
+
|
1551 |
+
# 203
|
1552 |
+
[implicit_add]
|
1553 |
+
filters=256
|
1554 |
+
|
1555 |
+
# 204
|
1556 |
+
[implicit_add]
|
1557 |
+
filters=384
|
1558 |
+
|
1559 |
+
# 205
|
1560 |
+
[implicit_add]
|
1561 |
+
filters=512
|
1562 |
+
|
1563 |
+
# 206
|
1564 |
+
[implicit_add]
|
1565 |
+
filters=640
|
1566 |
+
|
1567 |
+
# 207
|
1568 |
+
[implicit_mul]
|
1569 |
+
filters=255
|
1570 |
+
|
1571 |
+
# 208
|
1572 |
+
[implicit_mul]
|
1573 |
+
filters=255
|
1574 |
+
|
1575 |
+
# 209
|
1576 |
+
[implicit_mul]
|
1577 |
+
filters=255
|
1578 |
+
|
1579 |
+
# 210
|
1580 |
+
[implicit_mul]
|
1581 |
+
filters=255
|
1582 |
+
|
1583 |
+
# ============ Head ============ #
|
1584 |
+
|
1585 |
+
# YOLO-3
|
1586 |
+
|
1587 |
+
[route]
|
1588 |
+
layers = 163
|
1589 |
+
|
1590 |
+
[convolutional]
|
1591 |
+
batch_normalize=1
|
1592 |
+
size=3
|
1593 |
+
stride=1
|
1594 |
+
pad=1
|
1595 |
+
filters=256
|
1596 |
+
activation=silu
|
1597 |
+
|
1598 |
+
[shift_channels]
|
1599 |
+
from=203
|
1600 |
+
|
1601 |
+
[convolutional]
|
1602 |
+
size=1
|
1603 |
+
stride=1
|
1604 |
+
pad=1
|
1605 |
+
filters=255
|
1606 |
+
activation=linear
|
1607 |
+
|
1608 |
+
[control_channels]
|
1609 |
+
from=207
|
1610 |
+
|
1611 |
+
[yolo]
|
1612 |
+
mask = 0,1,2
|
1613 |
+
anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
|
1614 |
+
classes=80
|
1615 |
+
num=12
|
1616 |
+
jitter=.3
|
1617 |
+
ignore_thresh = .7
|
1618 |
+
truth_thresh = 1
|
1619 |
+
random=1
|
1620 |
+
scale_x_y = 1.05
|
1621 |
+
iou_thresh=0.213
|
1622 |
+
cls_normalizer=1.0
|
1623 |
+
iou_normalizer=0.07
|
1624 |
+
iou_loss=ciou
|
1625 |
+
nms_kind=greedynms
|
1626 |
+
beta_nms=0.6
|
1627 |
+
|
1628 |
+
|
1629 |
+
# YOLO-4
|
1630 |
+
|
1631 |
+
[route]
|
1632 |
+
layers = 176
|
1633 |
+
|
1634 |
+
[convolutional]
|
1635 |
+
batch_normalize=1
|
1636 |
+
size=3
|
1637 |
+
stride=1
|
1638 |
+
pad=1
|
1639 |
+
filters=384
|
1640 |
+
activation=silu
|
1641 |
+
|
1642 |
+
[shift_channels]
|
1643 |
+
from=204
|
1644 |
+
|
1645 |
+
[convolutional]
|
1646 |
+
size=1
|
1647 |
+
stride=1
|
1648 |
+
pad=1
|
1649 |
+
filters=255
|
1650 |
+
activation=linear
|
1651 |
+
|
1652 |
+
[control_channels]
|
1653 |
+
from=208
|
1654 |
+
|
1655 |
+
[yolo]
|
1656 |
+
mask = 3,4,5
|
1657 |
+
anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
|
1658 |
+
classes=80
|
1659 |
+
num=12
|
1660 |
+
jitter=.3
|
1661 |
+
ignore_thresh = .7
|
1662 |
+
truth_thresh = 1
|
1663 |
+
random=1
|
1664 |
+
scale_x_y = 1.05
|
1665 |
+
iou_thresh=0.213
|
1666 |
+
cls_normalizer=1.0
|
1667 |
+
iou_normalizer=0.07
|
1668 |
+
iou_loss=ciou
|
1669 |
+
nms_kind=greedynms
|
1670 |
+
beta_nms=0.6
|
1671 |
+
|
1672 |
+
|
1673 |
+
# YOLO-5
|
1674 |
+
|
1675 |
+
[route]
|
1676 |
+
layers = 189
|
1677 |
+
|
1678 |
+
[convolutional]
|
1679 |
+
batch_normalize=1
|
1680 |
+
size=3
|
1681 |
+
stride=1
|
1682 |
+
pad=1
|
1683 |
+
filters=512
|
1684 |
+
activation=silu
|
1685 |
+
|
1686 |
+
[shift_channels]
|
1687 |
+
from=205
|
1688 |
+
|
1689 |
+
[convolutional]
|
1690 |
+
size=1
|
1691 |
+
stride=1
|
1692 |
+
pad=1
|
1693 |
+
filters=255
|
1694 |
+
activation=linear
|
1695 |
+
|
1696 |
+
[control_channels]
|
1697 |
+
from=209
|
1698 |
+
|
1699 |
+
[yolo]
|
1700 |
+
mask = 6,7,8
|
1701 |
+
anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
|
1702 |
+
classes=80
|
1703 |
+
num=12
|
1704 |
+
jitter=.3
|
1705 |
+
ignore_thresh = .7
|
1706 |
+
truth_thresh = 1
|
1707 |
+
random=1
|
1708 |
+
scale_x_y = 1.05
|
1709 |
+
iou_thresh=0.213
|
1710 |
+
cls_normalizer=1.0
|
1711 |
+
iou_normalizer=0.07
|
1712 |
+
iou_loss=ciou
|
1713 |
+
nms_kind=greedynms
|
1714 |
+
beta_nms=0.6
|
1715 |
+
|
1716 |
+
|
1717 |
+
# YOLO-6
|
1718 |
+
|
1719 |
+
[route]
|
1720 |
+
layers = 202
|
1721 |
+
|
1722 |
+
[convolutional]
|
1723 |
+
batch_normalize=1
|
1724 |
+
size=3
|
1725 |
+
stride=1
|
1726 |
+
pad=1
|
1727 |
+
filters=640
|
1728 |
+
activation=silu
|
1729 |
+
|
1730 |
+
[shift_channels]
|
1731 |
+
from=206
|
1732 |
+
|
1733 |
+
[convolutional]
|
1734 |
+
size=1
|
1735 |
+
stride=1
|
1736 |
+
pad=1
|
1737 |
+
filters=255
|
1738 |
+
activation=linear
|
1739 |
+
|
1740 |
+
[control_channels]
|
1741 |
+
from=210
|
1742 |
+
|
1743 |
+
[yolo]
|
1744 |
+
mask = 9,10,11
|
1745 |
+
anchors = 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
|
1746 |
+
classes=80
|
1747 |
+
num=12
|
1748 |
+
jitter=.3
|
1749 |
+
ignore_thresh = .7
|
1750 |
+
truth_thresh = 1
|
1751 |
+
random=1
|
1752 |
+
scale_x_y = 1.05
|
1753 |
+
iou_thresh=0.213
|
1754 |
+
cls_normalizer=1.0
|
1755 |
+
iou_normalizer=0.07
|
1756 |
+
iou_loss=ciou
|
1757 |
+
nms_kind=greedynms
|
1758 |
+
beta_nms=0.6
|
1759 |
+
|
1760 |
+
# ============ End of Head ============ #
|
asone/detectors/yolor/models/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
asone/detectors/yolor/models/common.py
ADDED
@@ -0,0 +1,1023 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file contains modules common to various models
|
2 |
+
|
3 |
+
import math
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
from PIL import Image, ImageDraw
|
9 |
+
|
10 |
+
from asone.detectors.yolor.utils.datasets import letterbox
|
11 |
+
from asone.detectors.yolor.utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
|
12 |
+
from asone.detectors.yolor.utils.plots import color_list
|
13 |
+
|
14 |
+
try:
|
15 |
+
from pytorch_wavelets import DWTForward, DWTInverse
|
16 |
+
|
17 |
+
class DWT(nn.Module):
|
18 |
+
def __init__(self):
|
19 |
+
super(DWT, self).__init__()
|
20 |
+
self.xfm = DWTForward(J=1, wave='db1', mode='zero')
|
21 |
+
|
22 |
+
def forward(self, x):
|
23 |
+
b,c,w,h = x.shape
|
24 |
+
yl, yh = self.xfm(x)
|
25 |
+
return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1)
|
26 |
+
except:
|
27 |
+
|
28 |
+
class DWT(nn.Module): # use ReOrg instead
|
29 |
+
def __init__(self):
|
30 |
+
super(DWT, self).__init__()
|
31 |
+
|
32 |
+
def forward(self, x):
|
33 |
+
return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
|
34 |
+
|
35 |
+
|
36 |
+
class ImplicitA(nn.Module):
|
37 |
+
def __init__(self, channel):
|
38 |
+
super(ImplicitA, self).__init__()
|
39 |
+
self.channel = channel
|
40 |
+
self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
|
41 |
+
nn.init.normal_(self.implicit, std=.02)
|
42 |
+
|
43 |
+
def forward(self, x):
|
44 |
+
return self.implicit.expand_as(x) + x
|
45 |
+
|
46 |
+
|
47 |
+
class ImplicitM(nn.Module):
|
48 |
+
def __init__(self, channel):
|
49 |
+
super(ImplicitM, self).__init__()
|
50 |
+
self.channel = channel
|
51 |
+
self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1))
|
52 |
+
nn.init.normal_(self.implicit, mean=1., std=.02)
|
53 |
+
|
54 |
+
def forward(self, x):
|
55 |
+
return self.implicit.expand_as(x) * x
|
56 |
+
|
57 |
+
|
58 |
+
class ReOrg(nn.Module):
|
59 |
+
def __init__(self):
|
60 |
+
super(ReOrg, self).__init__()
|
61 |
+
|
62 |
+
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
63 |
+
return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
|
64 |
+
|
65 |
+
def autopad(k, p=None): # kernel, padding
|
66 |
+
# Pad to 'same'
|
67 |
+
if p is None:
|
68 |
+
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
69 |
+
return p
|
70 |
+
|
71 |
+
|
72 |
+
def DWConv(c1, c2, k=1, s=1, act=True):
|
73 |
+
# Depthwise convolution
|
74 |
+
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
75 |
+
|
76 |
+
|
77 |
+
class Conv(nn.Module):
|
78 |
+
# Standard convolution
|
79 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
80 |
+
super(Conv, self).__init__()
|
81 |
+
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
82 |
+
self.bn = nn.BatchNorm2d(c2)
|
83 |
+
self.act = nn.SiLU() if act else nn.Identity()
|
84 |
+
|
85 |
+
def forward(self, x):
|
86 |
+
return self.act(self.bn(self.conv(x)))
|
87 |
+
|
88 |
+
def fuseforward(self, x):
|
89 |
+
return self.act(self.conv(x))
|
90 |
+
|
91 |
+
|
92 |
+
class ConvSig(nn.Module):
|
93 |
+
# Standard convolution
|
94 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
95 |
+
super(ConvSig, self).__init__()
|
96 |
+
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
97 |
+
self.act = nn.Sigmoid() if act else nn.Identity()
|
98 |
+
|
99 |
+
def forward(self, x):
|
100 |
+
return self.act(self.conv(x))
|
101 |
+
|
102 |
+
def fuseforward(self, x):
|
103 |
+
return self.act(self.conv(x))
|
104 |
+
|
105 |
+
|
106 |
+
class ConvSqu(nn.Module):
|
107 |
+
# Standard convolution
|
108 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
109 |
+
super(ConvSqu, self).__init__()
|
110 |
+
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
111 |
+
self.act = nn.SiLU() if act else nn.Identity()
|
112 |
+
|
113 |
+
def forward(self, x):
|
114 |
+
return self.act(self.conv(x))
|
115 |
+
|
116 |
+
def fuseforward(self, x):
|
117 |
+
return self.act(self.conv(x))
|
118 |
+
|
119 |
+
|
120 |
+
class Bottleneck(nn.Module):
|
121 |
+
# Standard bottleneck
|
122 |
+
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
123 |
+
super(Bottleneck, self).__init__()
|
124 |
+
c_ = int(c2 * e) # hidden channels
|
125 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
126 |
+
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
127 |
+
self.add = shortcut and c1 == c2
|
128 |
+
|
129 |
+
def forward(self, x):
|
130 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
131 |
+
|
132 |
+
|
133 |
+
class BottleneckG(nn.Module):
|
134 |
+
# Standard bottleneck
|
135 |
+
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
136 |
+
super(BottleneckG, self).__init__()
|
137 |
+
c_ = int(c2 * e) # hidden channels
|
138 |
+
self.cv1 = Conv(c1, c_, 1, 1, g=g)
|
139 |
+
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
140 |
+
self.add = shortcut and c1 == c2
|
141 |
+
|
142 |
+
def forward(self, x):
|
143 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
144 |
+
|
145 |
+
|
146 |
+
class BottleneckCSP(nn.Module):
|
147 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
148 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
149 |
+
super(BottleneckCSP, self).__init__()
|
150 |
+
c_ = int(c2 * e) # hidden channels
|
151 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
152 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
153 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
154 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
155 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
156 |
+
self.act = nn.SiLU()
|
157 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
158 |
+
|
159 |
+
def forward(self, x):
|
160 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
161 |
+
y2 = self.cv2(x)
|
162 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
163 |
+
|
164 |
+
|
165 |
+
class BottleneckCSPF(nn.Module):
|
166 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
167 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
168 |
+
super(BottleneckCSPF, self).__init__()
|
169 |
+
c_ = int(c2 * e) # hidden channels
|
170 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
171 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
172 |
+
#self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
173 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
174 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
175 |
+
self.act = nn.SiLU()
|
176 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
177 |
+
|
178 |
+
def forward(self, x):
|
179 |
+
y1 = self.m(self.cv1(x))
|
180 |
+
y2 = self.cv2(x)
|
181 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
182 |
+
|
183 |
+
|
184 |
+
class BottleneckCSPL(nn.Module):
|
185 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
186 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
187 |
+
super(BottleneckCSPL, self).__init__()
|
188 |
+
c_ = int(c2 * e) # hidden channels
|
189 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
190 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
191 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
192 |
+
#self.cv4 = Conv(2 * c_, c2, 1, 1)
|
193 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
194 |
+
self.act = nn.SiLU()
|
195 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
196 |
+
|
197 |
+
def forward(self, x):
|
198 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
199 |
+
y2 = self.cv2(x)
|
200 |
+
return self.act(self.bn(torch.cat((y1, y2), dim=1)))
|
201 |
+
|
202 |
+
|
203 |
+
class BottleneckCSPLG(nn.Module):
|
204 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
205 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=3, e=0.25): # ch_in, ch_out, number, shortcut, groups, expansion
|
206 |
+
super(BottleneckCSPLG, self).__init__()
|
207 |
+
c_ = int(c2 * e) # hidden channels
|
208 |
+
self.cv1 = Conv(c1, g*c_, 1, 1)
|
209 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
210 |
+
self.cv3 = nn.Conv2d(g*c_, g*c_, 1, 1, groups=g, bias=False)
|
211 |
+
#self.cv4 = Conv(2 * c_, c2, 1, 1)
|
212 |
+
self.bn = nn.BatchNorm2d((1+g) * c_) # applied to cat(cv2, cv3)
|
213 |
+
self.act = nn.SiLU()
|
214 |
+
self.m = nn.Sequential(*[BottleneckG(g*c_, g*c_, shortcut, g, e=1.0) for _ in range(n)])
|
215 |
+
|
216 |
+
def forward(self, x):
|
217 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
218 |
+
y2 = self.cv2(x)
|
219 |
+
return self.act(self.bn(torch.cat((y1, y2), dim=1)))
|
220 |
+
|
221 |
+
|
222 |
+
class BottleneckCSPSE(nn.Module):
|
223 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
224 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
225 |
+
super(BottleneckCSPSE, self).__init__()
|
226 |
+
c_ = int(c2 * e) # hidden channels
|
227 |
+
self.avg_pool = nn.AdaptiveAvgPool2d(1)
|
228 |
+
self.cs = ConvSqu(c1, c1//8, 1, 1)
|
229 |
+
self.cvsig = ConvSig(c1//8, c1, 1, 1)
|
230 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
231 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
232 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
233 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
234 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
235 |
+
self.act = nn.SiLU()
|
236 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
237 |
+
|
238 |
+
def forward(self, x):
|
239 |
+
x = x * self.cvsig(self.cs(self.avg_pool(x))).expand_as(x)
|
240 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
241 |
+
y2 = self.cv2(x)
|
242 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
243 |
+
|
244 |
+
|
245 |
+
class BottleneckCSPSEA(nn.Module):
|
246 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
247 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
248 |
+
super(BottleneckCSPSEA, self).__init__()
|
249 |
+
c_ = int(c2 * e) # hidden channels
|
250 |
+
self.avg_pool = nn.AdaptiveAvgPool2d(1)
|
251 |
+
self.cs = ConvSqu(c1, c1//8, 1, 1)
|
252 |
+
self.cvsig = ConvSig(c1//8, c1, 1, 1)
|
253 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
254 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
255 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
256 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
257 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
258 |
+
self.act = nn.SiLU()
|
259 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
260 |
+
|
261 |
+
def forward(self, x):
|
262 |
+
x = x + x * self.cvsig(self.cs(self.avg_pool(x))).expand_as(x)
|
263 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
264 |
+
y2 = self.cv2(x)
|
265 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
266 |
+
|
267 |
+
|
268 |
+
class BottleneckCSPSAM(nn.Module):
|
269 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
270 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
271 |
+
super(BottleneckCSPSAM, self).__init__()
|
272 |
+
c_ = int(c2 * e) # hidden channels
|
273 |
+
self.cvsig = ConvSig(c1, c1, 1, 1)
|
274 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
275 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
276 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
277 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
278 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
279 |
+
self.act = nn.SiLU()
|
280 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
281 |
+
|
282 |
+
def forward(self, x):
|
283 |
+
x = x * self.cvsig(x)
|
284 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
285 |
+
y2 = self.cv2(x)
|
286 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
287 |
+
|
288 |
+
|
289 |
+
class BottleneckCSPSAMA(nn.Module):
|
290 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
291 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
292 |
+
super(BottleneckCSPSAMA, self).__init__()
|
293 |
+
c_ = int(c2 * e) # hidden channels
|
294 |
+
self.cvsig = ConvSig(c1, c1, 1, 1)
|
295 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
296 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
297 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
298 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
299 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
300 |
+
self.act = nn.SiLU()
|
301 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
302 |
+
|
303 |
+
def forward(self, x):
|
304 |
+
x = x + x * self.cvsig(x)
|
305 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
306 |
+
y2 = self.cv2(x)
|
307 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
308 |
+
|
309 |
+
|
310 |
+
class BottleneckCSPSAMB(nn.Module):
|
311 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
312 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
313 |
+
super(BottleneckCSPSAMB, self).__init__()
|
314 |
+
c_ = int(c2 * e) # hidden channels
|
315 |
+
self.cvsig = ConvSig(c2, c2, 1, 1)
|
316 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
317 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
318 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
319 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
320 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
321 |
+
self.act = nn.SiLU()
|
322 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
323 |
+
|
324 |
+
def forward(self, x):
|
325 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
326 |
+
y2 = self.cv2(x)
|
327 |
+
y = self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
328 |
+
return y * self.cvsig(y)
|
329 |
+
|
330 |
+
|
331 |
+
class BottleneckCSPGC(nn.Module):
|
332 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
333 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
334 |
+
super(BottleneckCSPGC, self).__init__()
|
335 |
+
c_ = int(c2 * e) # hidden channels
|
336 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
337 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
338 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
339 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
340 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
341 |
+
self.act = nn.SiLU()
|
342 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
343 |
+
|
344 |
+
self.channel_add_conv = nn.Sequential(
|
345 |
+
nn.Conv2d(c2, c2, kernel_size=1),
|
346 |
+
nn.LayerNorm([c2, 1, 1]),
|
347 |
+
nn.ReLU(inplace=True), # yapf: disable
|
348 |
+
nn.Conv2d(c2, c2, kernel_size=1))
|
349 |
+
|
350 |
+
self.conv_mask = nn.Conv2d(c2, 1, kernel_size=1)
|
351 |
+
self.softmax = nn.Softmax(dim=2)
|
352 |
+
|
353 |
+
def spatial_pool(self, x):
|
354 |
+
|
355 |
+
batch, channel, height, width = x.size()
|
356 |
+
|
357 |
+
input_x = x
|
358 |
+
# [N, C, H * W]
|
359 |
+
input_x = input_x.view(batch, channel, height * width)
|
360 |
+
# [N, 1, C, H * W]
|
361 |
+
input_x = input_x.unsqueeze(1)
|
362 |
+
# [N, 1, H, W]
|
363 |
+
context_mask = self.conv_mask(x)
|
364 |
+
# [N, 1, H * W]
|
365 |
+
context_mask = context_mask.view(batch, 1, height * width)
|
366 |
+
# [N, 1, H * W]
|
367 |
+
context_mask = self.softmax(context_mask)
|
368 |
+
# [N, 1, H * W, 1]
|
369 |
+
context_mask = context_mask.unsqueeze(-1)
|
370 |
+
# [N, 1, C, 1]
|
371 |
+
context = torch.matmul(input_x, context_mask)
|
372 |
+
# [N, C, 1, 1]
|
373 |
+
context = context.view(batch, channel, 1, 1)
|
374 |
+
|
375 |
+
return context
|
376 |
+
|
377 |
+
def forward(self, x):
|
378 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
379 |
+
y2 = self.cv2(x)
|
380 |
+
y = self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
381 |
+
|
382 |
+
return y + self.channel_add_conv(self.spatial_pool(y))
|
383 |
+
|
384 |
+
|
385 |
+
class BottleneckCSPDNL(nn.Module):
|
386 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
387 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
388 |
+
super(BottleneckCSPDNL, self).__init__()
|
389 |
+
c_ = int(c2 * e) # hidden channels
|
390 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
391 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
392 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
393 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
394 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
395 |
+
self.act = nn.SiLU()
|
396 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
397 |
+
|
398 |
+
|
399 |
+
self.conv_query = nn.Conv2d(c2, c2, kernel_size=1)
|
400 |
+
self.conv_key = nn.Conv2d(c2, c2, kernel_size=1)
|
401 |
+
self.conv_value = nn.Conv2d(c2, c2, kernel_size=1, bias=False)
|
402 |
+
self.conv_out = None
|
403 |
+
self.scale = math.sqrt(c2)
|
404 |
+
self.temperature = 0.05
|
405 |
+
self.softmax = nn.Softmax(dim=2)
|
406 |
+
self.gamma = nn.Parameter(torch.zeros(1))
|
407 |
+
self.conv_mask = nn.Conv2d(c2, 1, kernel_size=1)
|
408 |
+
|
409 |
+
def forward(self, x):
|
410 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
411 |
+
y2 = self.cv2(x)
|
412 |
+
y = self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
413 |
+
|
414 |
+
# [N, C, T, H, W]
|
415 |
+
residual = y
|
416 |
+
# [N, C, T, H', W']
|
417 |
+
input_x = y
|
418 |
+
# [N, C', T, H, W]
|
419 |
+
query = self.conv_query(y)
|
420 |
+
# [N, C', T, H', W']
|
421 |
+
key = self.conv_key(input_x)
|
422 |
+
value = self.conv_value(input_x)
|
423 |
+
# [N, C', H x W]
|
424 |
+
query = query.view(query.size(0), query.size(1), -1)
|
425 |
+
# [N, C', H' x W']
|
426 |
+
key = key.view(key.size(0), key.size(1), -1)
|
427 |
+
value = value.view(value.size(0), value.size(1), -1)
|
428 |
+
# channel whitening
|
429 |
+
key_mean = key.mean(2).unsqueeze(2)
|
430 |
+
query_mean = query.mean(2).unsqueeze(2)
|
431 |
+
key -= key_mean
|
432 |
+
query -= query_mean
|
433 |
+
# [N, T x H x W, T x H' x W']
|
434 |
+
sim_map = torch.bmm(query.transpose(1, 2), key)
|
435 |
+
sim_map = sim_map/self.scale
|
436 |
+
sim_map = sim_map/self.temperature
|
437 |
+
sim_map = self.softmax(sim_map)
|
438 |
+
# [N, T x H x W, C']
|
439 |
+
out_sim = torch.bmm(sim_map, value.transpose(1, 2))
|
440 |
+
# [N, C', T x H x W]
|
441 |
+
out_sim = out_sim.transpose(1, 2)
|
442 |
+
# [N, C', T, H, W]
|
443 |
+
out_sim = out_sim.view(out_sim.size(0), out_sim.size(1), *y.size()[2:]).contiguous()
|
444 |
+
out_sim = self.gamma * out_sim
|
445 |
+
# [N, 1, H', W']
|
446 |
+
mask = self.conv_mask(input_x)
|
447 |
+
# [N, 1, H'x W']
|
448 |
+
mask = mask.view(mask.size(0), mask.size(1), -1)
|
449 |
+
mask = self.softmax(mask)
|
450 |
+
# [N, C, 1, 1]
|
451 |
+
out_gc = torch.bmm(value, mask.permute(0,2,1)).unsqueeze(-1).contiguous()
|
452 |
+
|
453 |
+
return out_sim + out_gc + residual
|
454 |
+
|
455 |
+
|
456 |
+
class BottleneckCSP2(nn.Module):
|
457 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
458 |
+
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
459 |
+
super(BottleneckCSP2, self).__init__()
|
460 |
+
c_ = int(c2) # hidden channels
|
461 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
462 |
+
self.cv2 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
463 |
+
self.cv3 = Conv(2 * c_, c2, 1, 1)
|
464 |
+
self.bn = nn.BatchNorm2d(2 * c_)
|
465 |
+
self.act = nn.SiLU()
|
466 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
467 |
+
|
468 |
+
def forward(self, x):
|
469 |
+
x1 = self.cv1(x)
|
470 |
+
y1 = self.m(x1)
|
471 |
+
y2 = self.cv2(x1)
|
472 |
+
return self.cv3(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
473 |
+
|
474 |
+
|
475 |
+
class BottleneckCSP2SAM(nn.Module):
|
476 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
477 |
+
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
478 |
+
super(BottleneckCSP2SAM, self).__init__()
|
479 |
+
c_ = int(c2) # hidden channels
|
480 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
481 |
+
self.cvsig = ConvSig(c_, c_, 1, 1)
|
482 |
+
self.cv2 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
483 |
+
self.cv3 = Conv(2 * c_, c2, 1, 1)
|
484 |
+
self.bn = nn.BatchNorm2d(2 * c_)
|
485 |
+
self.act = nn.SiLU()
|
486 |
+
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
487 |
+
|
488 |
+
def forward(self, x):
|
489 |
+
x1 = self.cv1(x)
|
490 |
+
x1 = x1 * self.cvsig(x1).contiguous()
|
491 |
+
y1 = self.m(x1)
|
492 |
+
y2 = self.cv2(x1)
|
493 |
+
return self.cv3(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
494 |
+
|
495 |
+
|
496 |
+
class VoVCSP(nn.Module):
|
497 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
498 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
499 |
+
super(VoVCSP, self).__init__()
|
500 |
+
c_ = int(c2) # hidden channels
|
501 |
+
self.cv1 = Conv(c1//2, c_//2, 3, 1)
|
502 |
+
self.cv2 = Conv(c_//2, c_//2, 3, 1)
|
503 |
+
self.cv3 = Conv(c_, c2, 1, 1)
|
504 |
+
|
505 |
+
def forward(self, x):
|
506 |
+
_, x1 = x.chunk(2, dim=1)
|
507 |
+
x1 = self.cv1(x1)
|
508 |
+
x2 = self.cv2(x1)
|
509 |
+
return self.cv3(torch.cat((x1,x2), dim=1))
|
510 |
+
|
511 |
+
|
512 |
+
class SPP(nn.Module):
|
513 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
514 |
+
def __init__(self, c1, c2, k=(5, 9, 13)):
|
515 |
+
super(SPP, self).__init__()
|
516 |
+
c_ = c1 // 2 # hidden channels
|
517 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
518 |
+
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
519 |
+
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
520 |
+
|
521 |
+
def forward(self, x):
|
522 |
+
x = self.cv1(x)
|
523 |
+
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
524 |
+
|
525 |
+
|
526 |
+
class SPPCSP(nn.Module):
|
527 |
+
# CSP SPP https://github.com/WongKinYiu/CrossStagePartialNetworks
|
528 |
+
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
|
529 |
+
super(SPPCSP, self).__init__()
|
530 |
+
c_ = int(2 * c2 * e) # hidden channels
|
531 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
532 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
533 |
+
self.cv3 = Conv(c_, c_, 3, 1)
|
534 |
+
self.cv4 = Conv(c_, c_, 1, 1)
|
535 |
+
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
536 |
+
self.cv5 = Conv(4 * c_, c_, 1, 1)
|
537 |
+
self.cv6 = Conv(c_, c_, 3, 1)
|
538 |
+
self.bn = nn.BatchNorm2d(2 * c_)
|
539 |
+
self.act = nn.SiLU()
|
540 |
+
self.cv7 = Conv(2 * c_, c2, 1, 1)
|
541 |
+
|
542 |
+
def forward(self, x):
|
543 |
+
x1 = self.cv4(self.cv3(self.cv1(x)))
|
544 |
+
y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
|
545 |
+
y2 = self.cv2(x)
|
546 |
+
return self.cv7(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
547 |
+
|
548 |
+
|
549 |
+
class Focus(nn.Module):
|
550 |
+
# Focus wh information into c-space
|
551 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
552 |
+
super(Focus, self).__init__()
|
553 |
+
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
554 |
+
|
555 |
+
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
556 |
+
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
|
557 |
+
|
558 |
+
|
559 |
+
class MP(nn.Module):
|
560 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
561 |
+
def __init__(self, k=2):
|
562 |
+
super(MP, self).__init__()
|
563 |
+
self.m = nn.MaxPool2d(kernel_size=k, stride=k)
|
564 |
+
|
565 |
+
def forward(self, x):
|
566 |
+
return self.m(x)
|
567 |
+
|
568 |
+
|
569 |
+
class DownD(nn.Module):
|
570 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
571 |
+
def __init__(self, c1, c2, n=1, k=2):
|
572 |
+
super(DownD, self).__init__()
|
573 |
+
c_ = int(c1) # hidden channels
|
574 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
575 |
+
self.cv2 = Conv(c_, c_, 3, k)
|
576 |
+
self.cv3 = Conv(c_, c2, 1, 1)
|
577 |
+
self.cv4 = Conv(c1, c2, 1, 1)
|
578 |
+
self.ap = nn.AvgPool2d(kernel_size=k, stride=k)
|
579 |
+
|
580 |
+
def forward(self, x):
|
581 |
+
return self.cv3(self.cv2(self.cv1(x))) + self.cv4(self.ap(x))
|
582 |
+
|
583 |
+
|
584 |
+
class DownC(nn.Module):
|
585 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
586 |
+
def __init__(self, c1, c2, n=1, k=2):
|
587 |
+
super(DownC, self).__init__()
|
588 |
+
c_ = int(c1) # hidden channels
|
589 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
590 |
+
self.cv2 = Conv(c_, c2//2, 3, k)
|
591 |
+
self.cv3 = Conv(c1, c2//2, 1, 1)
|
592 |
+
self.mp = nn.MaxPool2d(kernel_size=k, stride=k)
|
593 |
+
|
594 |
+
def forward(self, x):
|
595 |
+
return torch.cat((self.cv2(self.cv1(x)), self.cv3(self.mp(x))), dim=1)
|
596 |
+
|
597 |
+
|
598 |
+
class DNL(nn.Module):
|
599 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
600 |
+
def __init__(self, c1, c2, k=3, s=1):
|
601 |
+
super(DNL, self).__init__()
|
602 |
+
c_ = int(c1) # hidden channels
|
603 |
+
|
604 |
+
#
|
605 |
+
self.conv_query = nn.Conv2d(c1, c_, kernel_size=1)
|
606 |
+
self.conv_key = nn.Conv2d(c1, c_, kernel_size=1)
|
607 |
+
|
608 |
+
self.conv_value = nn.Conv2d(c1, c1, kernel_size=1, bias=False)
|
609 |
+
self.conv_out = None
|
610 |
+
|
611 |
+
self.scale = math.sqrt(c_)
|
612 |
+
self.temperature = 0.05
|
613 |
+
|
614 |
+
self.softmax = nn.Softmax(dim=2)
|
615 |
+
|
616 |
+
self.gamma = nn.Parameter(torch.zeros(1))
|
617 |
+
|
618 |
+
self.conv_mask = nn.Conv2d(c1, 1, kernel_size=1)
|
619 |
+
|
620 |
+
self.cv = Conv(c1, c2, k, s)
|
621 |
+
|
622 |
+
def forward(self, x):
|
623 |
+
|
624 |
+
# [N, C, T, H, W]
|
625 |
+
residual = x
|
626 |
+
|
627 |
+
# [N, C, T, H', W']
|
628 |
+
input_x = x
|
629 |
+
|
630 |
+
# [N, C', T, H, W]
|
631 |
+
query = self.conv_query(x)
|
632 |
+
|
633 |
+
# [N, C', T, H', W']
|
634 |
+
key = self.conv_key(input_x)
|
635 |
+
value = self.conv_value(input_x)
|
636 |
+
|
637 |
+
# [N, C', H x W]
|
638 |
+
query = query.view(query.size(0), query.size(1), -1)
|
639 |
+
|
640 |
+
# [N, C', H' x W']
|
641 |
+
key = key.view(key.size(0), key.size(1), -1)
|
642 |
+
value = value.view(value.size(0), value.size(1), -1)
|
643 |
+
|
644 |
+
# channel whitening
|
645 |
+
key_mean = key.mean(2).unsqueeze(2)
|
646 |
+
query_mean = query.mean(2).unsqueeze(2)
|
647 |
+
key -= key_mean
|
648 |
+
query -= query_mean
|
649 |
+
|
650 |
+
# [N, T x H x W, T x H' x W']
|
651 |
+
sim_map = torch.bmm(query.transpose(1, 2), key)
|
652 |
+
sim_map = sim_map/self.scale
|
653 |
+
sim_map = sim_map/self.temperature
|
654 |
+
sim_map = self.softmax(sim_map)
|
655 |
+
|
656 |
+
# [N, T x H x W, C']
|
657 |
+
out_sim = torch.bmm(sim_map, value.transpose(1, 2))
|
658 |
+
|
659 |
+
# [N, C', T x H x W]
|
660 |
+
out_sim = out_sim.transpose(1, 2)
|
661 |
+
|
662 |
+
# [N, C', T, H, W]
|
663 |
+
out_sim = out_sim.view(out_sim.size(0), out_sim.size(1), *x.size()[2:])
|
664 |
+
out_sim = self.gamma * out_sim
|
665 |
+
|
666 |
+
# [N, 1, H', W']
|
667 |
+
mask = self.conv_mask(input_x)
|
668 |
+
# [N, 1, H'x W']
|
669 |
+
mask = mask.view(mask.size(0), mask.size(1), -1)
|
670 |
+
mask = self.softmax(mask)
|
671 |
+
# [N, C, 1, 1]
|
672 |
+
out_gc = torch.bmm(value, mask.permute(0,2,1)).unsqueeze(-1)
|
673 |
+
out_sim = out_sim+out_gc
|
674 |
+
|
675 |
+
return self.cv(out_sim + residual)
|
676 |
+
|
677 |
+
|
678 |
+
class GC(nn.Module):
|
679 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
680 |
+
def __init__(self, c1, c2, k=3, s=1):
|
681 |
+
super(GC, self).__init__()
|
682 |
+
c_ = int(c1) # hidden channels
|
683 |
+
|
684 |
+
#
|
685 |
+
self.channel_add_conv = nn.Sequential(
|
686 |
+
nn.Conv2d(c1, c_, kernel_size=1),
|
687 |
+
nn.LayerNorm([c_, 1, 1]),
|
688 |
+
nn.ReLU(inplace=True), # yapf: disable
|
689 |
+
nn.Conv2d(c_, c1, kernel_size=1))
|
690 |
+
|
691 |
+
self.conv_mask = nn.Conv2d(c_, 1, kernel_size=1)
|
692 |
+
self.softmax = nn.Softmax(dim=2)
|
693 |
+
|
694 |
+
self.cv = Conv(c1, c2, k, s)
|
695 |
+
|
696 |
+
|
697 |
+
def spatial_pool(self, x):
|
698 |
+
|
699 |
+
batch, channel, height, width = x.size()
|
700 |
+
|
701 |
+
input_x = x
|
702 |
+
# [N, C, H * W]
|
703 |
+
input_x = input_x.view(batch, channel, height * width)
|
704 |
+
# [N, 1, C, H * W]
|
705 |
+
input_x = input_x.unsqueeze(1)
|
706 |
+
# [N, 1, H, W]
|
707 |
+
context_mask = self.conv_mask(x)
|
708 |
+
# [N, 1, H * W]
|
709 |
+
context_mask = context_mask.view(batch, 1, height * width)
|
710 |
+
# [N, 1, H * W]
|
711 |
+
context_mask = self.softmax(context_mask)
|
712 |
+
# [N, 1, H * W, 1]
|
713 |
+
context_mask = context_mask.unsqueeze(-1)
|
714 |
+
# [N, 1, C, 1]
|
715 |
+
context = torch.matmul(input_x, context_mask)
|
716 |
+
# [N, C, 1, 1]
|
717 |
+
context = context.view(batch, channel, 1, 1)
|
718 |
+
|
719 |
+
return context
|
720 |
+
|
721 |
+
def forward(self, x):
|
722 |
+
|
723 |
+
return self.cv(x + self.channel_add_conv(self.spatial_pool(x)))
|
724 |
+
|
725 |
+
|
726 |
+
class SAM(nn.Module):
|
727 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
728 |
+
def __init__(self, c1, c2, k=3, s=1):
|
729 |
+
super(SAM, self).__init__()
|
730 |
+
c_ = int(c1) # hidden channels
|
731 |
+
self.cvsig = ConvSig(c1, c1, 1, 1)
|
732 |
+
self.cv = Conv(c1, c2, k, s)
|
733 |
+
|
734 |
+
def forward(self, x):
|
735 |
+
|
736 |
+
return self.cv(x * self.cvsig(x))
|
737 |
+
|
738 |
+
|
739 |
+
class SAMA(nn.Module):
|
740 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
741 |
+
def __init__(self, c1, c2, k=3, s=1):
|
742 |
+
super(SAMA, self).__init__()
|
743 |
+
c_ = int(c1) # hidden channels
|
744 |
+
self.cvsig = ConvSig(c1, c1, 1, 1)
|
745 |
+
self.cv = Conv(c1, c2, k, s)
|
746 |
+
|
747 |
+
def forward(self, x):
|
748 |
+
|
749 |
+
return self.cv(x + x * self.cvsig(x))
|
750 |
+
|
751 |
+
|
752 |
+
class SAMB(nn.Module):
|
753 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
754 |
+
def __init__(self, c1, c2, k=3, s=1):
|
755 |
+
super(SAMB, self).__init__()
|
756 |
+
c_ = int(c1) # hidden channels
|
757 |
+
self.cv = Conv(c1, c2, k, s)
|
758 |
+
self.cvsig = ConvSig(c2, c2, 1, 1)
|
759 |
+
|
760 |
+
def forward(self, x):
|
761 |
+
|
762 |
+
x = self.cv(x)
|
763 |
+
|
764 |
+
return x * self.cvsig(x)
|
765 |
+
|
766 |
+
|
767 |
+
class Concat(nn.Module):
|
768 |
+
# Concatenate a list of tensors along dimension
|
769 |
+
def __init__(self, dimension=1):
|
770 |
+
super(Concat, self).__init__()
|
771 |
+
self.d = dimension
|
772 |
+
|
773 |
+
def forward(self, x):
|
774 |
+
return torch.cat(x, self.d)
|
775 |
+
|
776 |
+
|
777 |
+
class NMS(nn.Module):
|
778 |
+
# Non-Maximum Suppression (NMS) module
|
779 |
+
conf = 0.25 # confidence threshold
|
780 |
+
iou = 0.45 # IoU threshold
|
781 |
+
classes = None # (optional list) filter by class
|
782 |
+
|
783 |
+
def __init__(self):
|
784 |
+
super(NMS, self).__init__()
|
785 |
+
|
786 |
+
def forward(self, x):
|
787 |
+
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
|
788 |
+
|
789 |
+
|
790 |
+
class autoShape(nn.Module):
|
791 |
+
# input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
792 |
+
img_size = 640 # inference size (pixels)
|
793 |
+
conf = 0.25 # NMS confidence threshold
|
794 |
+
iou = 0.45 # NMS IoU threshold
|
795 |
+
classes = None # (optional list) filter by class
|
796 |
+
|
797 |
+
def __init__(self, model):
|
798 |
+
super(autoShape, self).__init__()
|
799 |
+
self.model = model.eval()
|
800 |
+
|
801 |
+
def forward(self, imgs, size=640, augment=False, profile=False):
|
802 |
+
# supports inference from various sources. For height=720, width=1280, RGB images example inputs are:
|
803 |
+
# opencv: imgs = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3)
|
804 |
+
# PIL: imgs = Image.open('image.jpg') # HWC x(720,1280,3)
|
805 |
+
# numpy: imgs = np.zeros((720,1280,3)) # HWC
|
806 |
+
# torch: imgs = torch.zeros(16,3,720,1280) # BCHW
|
807 |
+
# multiple: imgs = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
808 |
+
|
809 |
+
p = next(self.model.parameters()) # for device and type
|
810 |
+
if isinstance(imgs, torch.Tensor): # torch
|
811 |
+
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
|
812 |
+
|
813 |
+
# Pre-process
|
814 |
+
if not isinstance(imgs, list):
|
815 |
+
imgs = [imgs]
|
816 |
+
shape0, shape1 = [], [] # image and inference shapes
|
817 |
+
batch = range(len(imgs)) # batch size
|
818 |
+
for i in batch:
|
819 |
+
imgs[i] = np.array(imgs[i]) # to numpy
|
820 |
+
imgs[i] = imgs[i][:, :, :3] if imgs[i].ndim == 3 else np.tile(imgs[i][:, :, None], 3) # enforce 3ch input
|
821 |
+
s = imgs[i].shape[:2] # HWC
|
822 |
+
shape0.append(s) # image shape
|
823 |
+
g = (size / max(s)) # gain
|
824 |
+
shape1.append([y * g for y in s])
|
825 |
+
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
|
826 |
+
x = [letterbox(imgs[i], new_shape=shape1, auto=False)[0] for i in batch] # pad
|
827 |
+
x = np.stack(x, 0) if batch[-1] else x[0][None] # stack
|
828 |
+
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
|
829 |
+
x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32
|
830 |
+
|
831 |
+
# Inference
|
832 |
+
with torch.no_grad():
|
833 |
+
y = self.model(x, augment, profile)[0] # forward
|
834 |
+
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
|
835 |
+
|
836 |
+
# Post-process
|
837 |
+
for i in batch:
|
838 |
+
if y[i] is not None:
|
839 |
+
y[i][:, :4] = scale_coords(shape1, y[i][:, :4], shape0[i])
|
840 |
+
|
841 |
+
return Detections(imgs, y, self.names)
|
842 |
+
|
843 |
+
|
844 |
+
class Detections:
|
845 |
+
# detections class for YOLOv5 inference results
|
846 |
+
def __init__(self, imgs, pred, names=None):
|
847 |
+
super(Detections, self).__init__()
|
848 |
+
self.imgs = imgs # list of images as numpy arrays
|
849 |
+
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
850 |
+
self.names = names # class names
|
851 |
+
self.xyxy = pred # xyxy pixels
|
852 |
+
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
853 |
+
gn = [torch.Tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.]) for im in imgs] # normalization gains
|
854 |
+
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
855 |
+
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
856 |
+
|
857 |
+
def display(self, pprint=False, show=False, save=False):
|
858 |
+
colors = color_list()
|
859 |
+
for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
|
860 |
+
str = f'Image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
|
861 |
+
if pred is not None:
|
862 |
+
for c in pred[:, -1].unique():
|
863 |
+
n = (pred[:, -1] == c).sum() # detections per class
|
864 |
+
str += f'{n} {self.names[int(c)]}s, ' # add to string
|
865 |
+
if show or save:
|
866 |
+
img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
|
867 |
+
for *box, conf, cls in pred: # xyxy, confidence, class
|
868 |
+
# str += '%s %.2f, ' % (names[int(cls)], conf) # label
|
869 |
+
ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot
|
870 |
+
if save:
|
871 |
+
f = f'results{i}.jpg'
|
872 |
+
str += f"saved to '{f}'"
|
873 |
+
img.save(f) # save
|
874 |
+
if show:
|
875 |
+
img.show(f'Image {i}') # show
|
876 |
+
if pprint:
|
877 |
+
print(str)
|
878 |
+
|
879 |
+
def print(self):
|
880 |
+
self.display(pprint=True) # print results
|
881 |
+
|
882 |
+
def show(self):
|
883 |
+
self.display(show=True) # show results
|
884 |
+
|
885 |
+
def save(self):
|
886 |
+
self.display(save=True) # save results
|
887 |
+
|
888 |
+
|
889 |
+
class Flatten(nn.Module):
|
890 |
+
# Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
|
891 |
+
@staticmethod
|
892 |
+
def forward(x):
|
893 |
+
return x.view(x.size(0), -1)
|
894 |
+
|
895 |
+
|
896 |
+
class Classify(nn.Module):
|
897 |
+
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
898 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
899 |
+
super(Classify, self).__init__()
|
900 |
+
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
|
901 |
+
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1)
|
902 |
+
self.flat = Flatten()
|
903 |
+
|
904 |
+
def forward(self, x):
|
905 |
+
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
|
906 |
+
return self.flat(self.conv(z)) # flatten to x(b,c2)
|
907 |
+
|
908 |
+
|
909 |
+
class TransformerLayer(nn.Module):
|
910 |
+
def __init__(self, c, num_heads):
|
911 |
+
super().__init__()
|
912 |
+
|
913 |
+
self.ln1 = nn.LayerNorm(c)
|
914 |
+
self.q = nn.Linear(c, c, bias=False)
|
915 |
+
self.k = nn.Linear(c, c, bias=False)
|
916 |
+
self.v = nn.Linear(c, c, bias=False)
|
917 |
+
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
|
918 |
+
self.ln2 = nn.LayerNorm(c)
|
919 |
+
self.fc1 = nn.Linear(c, c, bias=False)
|
920 |
+
self.fc2 = nn.Linear(c, c, bias=False)
|
921 |
+
|
922 |
+
def forward(self, x):
|
923 |
+
x_ = self.ln1(x)
|
924 |
+
x = self.ma(self.q(x_), self.k(x_), self.v(x_))[0] + x
|
925 |
+
x = self.ln2(x)
|
926 |
+
x = self.fc2(self.fc1(x)) + x
|
927 |
+
return x
|
928 |
+
|
929 |
+
|
930 |
+
class TransformerBlock(nn.Module):
|
931 |
+
def __init__(self, c1, c2, num_heads, num_layers):
|
932 |
+
super().__init__()
|
933 |
+
|
934 |
+
self.conv = None
|
935 |
+
if c1 != c2:
|
936 |
+
self.conv = Conv(c1, c2)
|
937 |
+
self.linear = nn.Linear(c2, c2)
|
938 |
+
self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
|
939 |
+
self.c2 = c2
|
940 |
+
|
941 |
+
def forward(self, x):
|
942 |
+
if self.conv is not None:
|
943 |
+
x = self.conv(x)
|
944 |
+
b, _, w, h = x.shape
|
945 |
+
p = x.flatten(2)
|
946 |
+
p = p.unsqueeze(0)
|
947 |
+
p = p.transpose(0, 3)
|
948 |
+
p = p.squeeze(3)
|
949 |
+
e = self.linear(p)
|
950 |
+
x = p + e
|
951 |
+
|
952 |
+
x = self.tr(x)
|
953 |
+
x = x.unsqueeze(3)
|
954 |
+
x = x.transpose(0, 3)
|
955 |
+
x = x.reshape(b, self.c2, w, h)
|
956 |
+
return x
|
957 |
+
|
958 |
+
|
959 |
+
|
960 |
+
class BottleneckCSPTR(nn.Module):
|
961 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
962 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
963 |
+
super(BottleneckCSPTR, self).__init__()
|
964 |
+
c_ = int(c2 * e) # hidden channels
|
965 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
966 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
967 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
968 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
969 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
970 |
+
self.act = nn.SiLU()
|
971 |
+
self.m = TransformerBlock(c_, c_, 4, n)
|
972 |
+
|
973 |
+
def forward(self, x):
|
974 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
975 |
+
y2 = self.cv2(x)
|
976 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
977 |
+
|
978 |
+
class BottleneckCSP2TR(nn.Module):
|
979 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
980 |
+
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
981 |
+
super(BottleneckCSP2TR, self).__init__()
|
982 |
+
c_ = int(c2) # hidden channels
|
983 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
984 |
+
self.cv2 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
985 |
+
self.cv3 = Conv(2 * c_, c2, 1, 1)
|
986 |
+
self.bn = nn.BatchNorm2d(2 * c_)
|
987 |
+
self.act = nn.SiLU()
|
988 |
+
self.m = TransformerBlock(c_, c_, 4, n)
|
989 |
+
|
990 |
+
def forward(self, x):
|
991 |
+
x1 = self.cv1(x)
|
992 |
+
y1 = self.m(x1)
|
993 |
+
y2 = self.cv2(x1)
|
994 |
+
return self.cv3(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
995 |
+
|
996 |
+
|
997 |
+
class SPPCSPTR(nn.Module):
|
998 |
+
# CSP SPP https://github.com/WongKinYiu/CrossStagePartialNetworks
|
999 |
+
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
|
1000 |
+
super(SPPCSPTR, self).__init__()
|
1001 |
+
c_ = int(2 * c2 * e) # hidden channels
|
1002 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
1003 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
1004 |
+
self.cv3 = Conv(c_, c_, 3, 1)
|
1005 |
+
self.cv4 = Conv(c_, c_, 1, 1)
|
1006 |
+
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
1007 |
+
self.cv5 = Conv(4 * c_, c_, 1, 1)
|
1008 |
+
self.cv6 = TransformerBlock(c_, c_, 4, 1)
|
1009 |
+
self.bn = nn.BatchNorm2d(2 * c_)
|
1010 |
+
self.act = nn.SiLU()
|
1011 |
+
self.cv7 = Conv(2 * c_, c2, 1, 1)
|
1012 |
+
|
1013 |
+
def forward(self, x):
|
1014 |
+
x1 = self.cv4(self.cv3(self.cv1(x)))
|
1015 |
+
y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
|
1016 |
+
y2 = self.cv2(x)
|
1017 |
+
return self.cv7(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
1018 |
+
|
1019 |
+
class TR(BottleneckCSPTR):
|
1020 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
1021 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
1022 |
+
c_ = int(c2 * e)
|
1023 |
+
self.m = TransformerBlock(c_, c_, 4, n)
|
asone/detectors/yolor/models/export.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
import torch
|
4 |
+
|
5 |
+
from asone.detectors.yolor.utils.google_utils import attempt_download
|
6 |
+
|
7 |
+
if __name__ == '__main__':
|
8 |
+
parser = argparse.ArgumentParser()
|
9 |
+
parser.add_argument('--weights', type=str, default='./yolov4.pt', help='weights path')
|
10 |
+
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
|
11 |
+
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
|
12 |
+
opt = parser.parse_args()
|
13 |
+
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
|
14 |
+
print(opt)
|
15 |
+
|
16 |
+
# Input
|
17 |
+
img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
|
18 |
+
|
19 |
+
# Load PyTorch model
|
20 |
+
attempt_download(opt.weights)
|
21 |
+
model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
|
22 |
+
model.eval()
|
23 |
+
model.model[-1].export = True # set Detect() layer export=True
|
24 |
+
y = model(img) # dry run
|
25 |
+
|
26 |
+
# TorchScript export
|
27 |
+
try:
|
28 |
+
print('\nStarting TorchScript export with torch %s...' % torch.__version__)
|
29 |
+
f = opt.weights.replace('.pt', '.torchscript.pt') # filename
|
30 |
+
ts = torch.jit.trace(model, img)
|
31 |
+
ts.save(f)
|
32 |
+
print('TorchScript export success, saved as %s' % f)
|
33 |
+
except Exception as e:
|
34 |
+
print('TorchScript export failure: %s' % e)
|
35 |
+
|
36 |
+
# ONNX export
|
37 |
+
try:
|
38 |
+
import onnx
|
39 |
+
|
40 |
+
print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
|
41 |
+
f = opt.weights.replace('.pt', '.onnx') # filename
|
42 |
+
model.fuse() # only for ONNX
|
43 |
+
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
|
44 |
+
output_names=['classes', 'boxes'] if y is None else ['output'])
|
45 |
+
|
46 |
+
# Checks
|
47 |
+
onnx_model = onnx.load(f) # load onnx model
|
48 |
+
onnx.checker.check_model(onnx_model) # check onnx model
|
49 |
+
print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
|
50 |
+
print('ONNX export success, saved as %s' % f)
|
51 |
+
except Exception as e:
|
52 |
+
print('ONNX export failure: %s' % e)
|
53 |
+
|
54 |
+
# CoreML export
|
55 |
+
try:
|
56 |
+
import coremltools as ct
|
57 |
+
|
58 |
+
print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
|
59 |
+
# convert model from torchscript and apply pixel scaling as per detect.py
|
60 |
+
model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
|
61 |
+
f = opt.weights.replace('.pt', '.mlmodel') # filename
|
62 |
+
model.save(f)
|
63 |
+
print('CoreML export success, saved as %s' % f)
|
64 |
+
except Exception as e:
|
65 |
+
print('CoreML export failure: %s' % e)
|
66 |
+
|
67 |
+
# Finish
|
68 |
+
print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
|
asone/detectors/yolor/models/models.py
ADDED
@@ -0,0 +1,761 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from asone.detectors.yolor.utils.google_utils import *
|
2 |
+
from asone.detectors.yolor.utils.layers import *
|
3 |
+
from asone.detectors.yolor.utils.parse_config import *
|
4 |
+
from asone.detectors.yolor.utils import torch_utils
|
5 |
+
|
6 |
+
ONNX_EXPORT = False
|
7 |
+
|
8 |
+
|
9 |
+
def create_modules(module_defs, img_size, cfg):
|
10 |
+
# Constructs module list of layer blocks from module configuration in module_defs
|
11 |
+
|
12 |
+
img_size = [img_size] * 2 if isinstance(img_size, int) else img_size # expand if necessary
|
13 |
+
_ = module_defs.pop(0) # cfg training hyperparams (unused)
|
14 |
+
output_filters = [3] # input channels
|
15 |
+
module_list = nn.ModuleList()
|
16 |
+
routs = [] # list of layers which rout to deeper layers
|
17 |
+
yolo_index = -1
|
18 |
+
|
19 |
+
for i, mdef in enumerate(module_defs):
|
20 |
+
modules = nn.Sequential()
|
21 |
+
|
22 |
+
if mdef['type'] == 'convolutional':
|
23 |
+
bn = mdef['batch_normalize']
|
24 |
+
filters = mdef['filters']
|
25 |
+
k = mdef['size'] # kernel size
|
26 |
+
stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x'])
|
27 |
+
if isinstance(k, int): # single-size conv
|
28 |
+
modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1],
|
29 |
+
out_channels=filters,
|
30 |
+
kernel_size=k,
|
31 |
+
stride=stride,
|
32 |
+
padding=k // 2 if mdef['pad'] else 0,
|
33 |
+
groups=mdef['groups'] if 'groups' in mdef else 1,
|
34 |
+
bias=not bn))
|
35 |
+
else: # multiple-size conv
|
36 |
+
modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1],
|
37 |
+
out_ch=filters,
|
38 |
+
k=k,
|
39 |
+
stride=stride,
|
40 |
+
bias=not bn))
|
41 |
+
|
42 |
+
if bn:
|
43 |
+
modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4))
|
44 |
+
else:
|
45 |
+
routs.append(i) # detection output (goes into yolo layer)
|
46 |
+
|
47 |
+
if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441
|
48 |
+
modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))
|
49 |
+
elif mdef['activation'] == 'swish':
|
50 |
+
modules.add_module('activation', Swish())
|
51 |
+
elif mdef['activation'] == 'mish':
|
52 |
+
modules.add_module('activation', Mish())
|
53 |
+
elif mdef['activation'] == 'emb':
|
54 |
+
modules.add_module('activation', F.normalize())
|
55 |
+
elif mdef['activation'] == 'logistic':
|
56 |
+
modules.add_module('activation', nn.Sigmoid())
|
57 |
+
elif mdef['activation'] == 'silu':
|
58 |
+
modules.add_module('activation', nn.SiLU())
|
59 |
+
|
60 |
+
elif mdef['type'] == 'deformableconvolutional':
|
61 |
+
bn = mdef['batch_normalize']
|
62 |
+
filters = mdef['filters']
|
63 |
+
k = mdef['size'] # kernel size
|
64 |
+
stride = mdef['stride'] if 'stride' in mdef else (mdef['stride_y'], mdef['stride_x'])
|
65 |
+
if isinstance(k, int): # single-size conv
|
66 |
+
modules.add_module('DeformConv2d', DeformConv2d(output_filters[-1],
|
67 |
+
filters,
|
68 |
+
kernel_size=k,
|
69 |
+
padding=k // 2 if mdef['pad'] else 0,
|
70 |
+
stride=stride,
|
71 |
+
bias=not bn,
|
72 |
+
modulation=True))
|
73 |
+
else: # multiple-size conv
|
74 |
+
modules.add_module('MixConv2d', MixConv2d(in_ch=output_filters[-1],
|
75 |
+
out_ch=filters,
|
76 |
+
k=k,
|
77 |
+
stride=stride,
|
78 |
+
bias=not bn))
|
79 |
+
|
80 |
+
if bn:
|
81 |
+
modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4))
|
82 |
+
else:
|
83 |
+
routs.append(i) # detection output (goes into yolo layer)
|
84 |
+
|
85 |
+
if mdef['activation'] == 'leaky': # activation study https://github.com/ultralytics/yolov3/issues/441
|
86 |
+
modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))
|
87 |
+
elif mdef['activation'] == 'swish':
|
88 |
+
modules.add_module('activation', Swish())
|
89 |
+
elif mdef['activation'] == 'mish':
|
90 |
+
modules.add_module('activation', Mish())
|
91 |
+
elif mdef['activation'] == 'silu':
|
92 |
+
modules.add_module('activation', nn.SiLU())
|
93 |
+
|
94 |
+
elif mdef['type'] == 'dropout':
|
95 |
+
p = mdef['probability']
|
96 |
+
modules = nn.Dropout(p)
|
97 |
+
|
98 |
+
elif mdef['type'] == 'avgpool':
|
99 |
+
modules = GAP()
|
100 |
+
|
101 |
+
elif mdef['type'] == 'silence':
|
102 |
+
filters = output_filters[-1]
|
103 |
+
modules = Silence()
|
104 |
+
|
105 |
+
elif mdef['type'] == 'scale_channels': # nn.Sequential() placeholder for 'shortcut' layer
|
106 |
+
layers = mdef['from']
|
107 |
+
filters = output_filters[-1]
|
108 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
109 |
+
modules = ScaleChannel(layers=layers)
|
110 |
+
|
111 |
+
elif mdef['type'] == 'shift_channels': # nn.Sequential() placeholder for 'shortcut' layer
|
112 |
+
layers = mdef['from']
|
113 |
+
filters = output_filters[-1]
|
114 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
115 |
+
modules = ShiftChannel(layers=layers)
|
116 |
+
|
117 |
+
elif mdef['type'] == 'shift_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer
|
118 |
+
layers = mdef['from']
|
119 |
+
filters = output_filters[-1]
|
120 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
121 |
+
modules = ShiftChannel2D(layers=layers)
|
122 |
+
|
123 |
+
elif mdef['type'] == 'control_channels': # nn.Sequential() placeholder for 'shortcut' layer
|
124 |
+
layers = mdef['from']
|
125 |
+
filters = output_filters[-1]
|
126 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
127 |
+
modules = ControlChannel(layers=layers)
|
128 |
+
|
129 |
+
elif mdef['type'] == 'control_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer
|
130 |
+
layers = mdef['from']
|
131 |
+
filters = output_filters[-1]
|
132 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
133 |
+
modules = ControlChannel2D(layers=layers)
|
134 |
+
|
135 |
+
elif mdef['type'] == 'alternate_channels': # nn.Sequential() placeholder for 'shortcut' layer
|
136 |
+
layers = mdef['from']
|
137 |
+
filters = output_filters[-1] * 2
|
138 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
139 |
+
modules = AlternateChannel(layers=layers)
|
140 |
+
|
141 |
+
elif mdef['type'] == 'alternate_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer
|
142 |
+
layers = mdef['from']
|
143 |
+
filters = output_filters[-1] * 2
|
144 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
145 |
+
modules = AlternateChannel2D(layers=layers)
|
146 |
+
|
147 |
+
elif mdef['type'] == 'select_channels': # nn.Sequential() placeholder for 'shortcut' layer
|
148 |
+
layers = mdef['from']
|
149 |
+
filters = output_filters[-1]
|
150 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
151 |
+
modules = SelectChannel(layers=layers)
|
152 |
+
|
153 |
+
elif mdef['type'] == 'select_channels_2d': # nn.Sequential() placeholder for 'shortcut' layer
|
154 |
+
layers = mdef['from']
|
155 |
+
filters = output_filters[-1]
|
156 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
157 |
+
modules = SelectChannel2D(layers=layers)
|
158 |
+
|
159 |
+
elif mdef['type'] == 'sam': # nn.Sequential() placeholder for 'shortcut' layer
|
160 |
+
layers = mdef['from']
|
161 |
+
filters = output_filters[-1]
|
162 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
163 |
+
modules = ScaleSpatial(layers=layers)
|
164 |
+
|
165 |
+
elif mdef['type'] == 'BatchNorm2d':
|
166 |
+
filters = output_filters[-1]
|
167 |
+
modules = nn.BatchNorm2d(filters, momentum=0.03, eps=1E-4)
|
168 |
+
if i == 0 and filters == 3: # normalize RGB image
|
169 |
+
# imagenet mean and var https://pytorch.org/docs/stable/torchvision/models.html#classification
|
170 |
+
modules.running_mean = torch.tensor([0.485, 0.456, 0.406])
|
171 |
+
modules.running_var = torch.tensor([0.0524, 0.0502, 0.0506])
|
172 |
+
|
173 |
+
elif mdef['type'] == 'maxpool':
|
174 |
+
k = mdef['size'] # kernel size
|
175 |
+
stride = mdef['stride']
|
176 |
+
maxpool = nn.MaxPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2)
|
177 |
+
if k == 2 and stride == 1: # yolov3-tiny
|
178 |
+
modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1)))
|
179 |
+
modules.add_module('MaxPool2d', maxpool)
|
180 |
+
else:
|
181 |
+
modules = maxpool
|
182 |
+
|
183 |
+
elif mdef['type'] == 'local_avgpool':
|
184 |
+
k = mdef['size'] # kernel size
|
185 |
+
stride = mdef['stride']
|
186 |
+
avgpool = nn.AvgPool2d(kernel_size=k, stride=stride, padding=(k - 1) // 2)
|
187 |
+
if k == 2 and stride == 1: # yolov3-tiny
|
188 |
+
modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1)))
|
189 |
+
modules.add_module('AvgPool2d', avgpool)
|
190 |
+
else:
|
191 |
+
modules = avgpool
|
192 |
+
|
193 |
+
elif mdef['type'] == 'upsample':
|
194 |
+
if ONNX_EXPORT: # explicitly state size, avoid scale_factor
|
195 |
+
g = (yolo_index + 1) * 2 / 32 # gain
|
196 |
+
modules = nn.Upsample(size=tuple(int(x * g) for x in img_size)) # img_size = (320, 192)
|
197 |
+
else:
|
198 |
+
modules = nn.Upsample(scale_factor=mdef['stride'])
|
199 |
+
|
200 |
+
elif mdef['type'] == 'route': # nn.Sequential() placeholder for 'route' layer
|
201 |
+
layers = mdef['layers']
|
202 |
+
filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])
|
203 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
204 |
+
modules = FeatureConcat(layers=layers)
|
205 |
+
|
206 |
+
elif mdef['type'] == 'route2': # nn.Sequential() placeholder for 'route' layer
|
207 |
+
layers = mdef['layers']
|
208 |
+
filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])
|
209 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
210 |
+
modules = FeatureConcat2(layers=layers)
|
211 |
+
|
212 |
+
elif mdef['type'] == 'route3': # nn.Sequential() placeholder for 'route' layer
|
213 |
+
layers = mdef['layers']
|
214 |
+
filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])
|
215 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
216 |
+
modules = FeatureConcat3(layers=layers)
|
217 |
+
|
218 |
+
elif mdef['type'] == 'route_lhalf': # nn.Sequential() placeholder for 'route' layer
|
219 |
+
layers = mdef['layers']
|
220 |
+
filters = sum([output_filters[l + 1 if l > 0 else l] for l in layers])//2
|
221 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
222 |
+
modules = FeatureConcat_l(layers=layers)
|
223 |
+
|
224 |
+
elif mdef['type'] == 'shortcut': # nn.Sequential() placeholder for 'shortcut' layer
|
225 |
+
layers = mdef['from']
|
226 |
+
filters = output_filters[-1]
|
227 |
+
routs.extend([i + l if l < 0 else l for l in layers])
|
228 |
+
modules = WeightedFeatureFusion(layers=layers, weight='weights_type' in mdef)
|
229 |
+
|
230 |
+
elif mdef['type'] == 'reorg3d': # yolov3-spp-pan-scale
|
231 |
+
pass
|
232 |
+
|
233 |
+
elif mdef['type'] == 'reorg': # yolov3-spp-pan-scale
|
234 |
+
filters = 4 * output_filters[-1]
|
235 |
+
modules.add_module('Reorg', Reorg())
|
236 |
+
|
237 |
+
elif mdef['type'] == 'dwt': # yolov3-spp-pan-scale
|
238 |
+
filters = 4 * output_filters[-1]
|
239 |
+
modules.add_module('DWT', DWT())
|
240 |
+
|
241 |
+
elif mdef['type'] == 'implicit_add': # yolov3-spp-pan-scale
|
242 |
+
filters = mdef['filters']
|
243 |
+
modules = ImplicitA(channel=filters)
|
244 |
+
|
245 |
+
elif mdef['type'] == 'implicit_mul': # yolov3-spp-pan-scale
|
246 |
+
filters = mdef['filters']
|
247 |
+
modules = ImplicitM(channel=filters)
|
248 |
+
|
249 |
+
elif mdef['type'] == 'implicit_cat': # yolov3-spp-pan-scale
|
250 |
+
filters = mdef['filters']
|
251 |
+
modules = ImplicitC(channel=filters)
|
252 |
+
|
253 |
+
elif mdef['type'] == 'implicit_add_2d': # yolov3-spp-pan-scale
|
254 |
+
channels = mdef['filters']
|
255 |
+
filters = mdef['atoms']
|
256 |
+
modules = Implicit2DA(atom=filters, channel=channels)
|
257 |
+
|
258 |
+
elif mdef['type'] == 'implicit_mul_2d': # yolov3-spp-pan-scale
|
259 |
+
channels = mdef['filters']
|
260 |
+
filters = mdef['atoms']
|
261 |
+
modules = Implicit2DM(atom=filters, channel=channels)
|
262 |
+
|
263 |
+
elif mdef['type'] == 'implicit_cat_2d': # yolov3-spp-pan-scale
|
264 |
+
channels = mdef['filters']
|
265 |
+
filters = mdef['atoms']
|
266 |
+
modules = Implicit2DC(atom=filters, channel=channels)
|
267 |
+
|
268 |
+
elif mdef['type'] == 'yolo':
|
269 |
+
yolo_index += 1
|
270 |
+
stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides
|
271 |
+
if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides
|
272 |
+
stride = [32, 16, 8]
|
273 |
+
layers = mdef['from'] if 'from' in mdef else []
|
274 |
+
modules = YOLOLayer(anchors=mdef['anchors'][mdef['mask']], # anchor list
|
275 |
+
nc=mdef['classes'], # number of classes
|
276 |
+
img_size=img_size, # (416, 416)
|
277 |
+
yolo_index=yolo_index, # 0, 1, 2...
|
278 |
+
layers=layers, # output layers
|
279 |
+
stride=stride[yolo_index])
|
280 |
+
|
281 |
+
# Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
|
282 |
+
try:
|
283 |
+
j = layers[yolo_index] if 'from' in mdef else -2
|
284 |
+
bias_ = module_list[j][0].bias # shape(255,)
|
285 |
+
bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85)
|
286 |
+
#bias[:, 4] += -4.5 # obj
|
287 |
+
bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image)
|
288 |
+
bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc)
|
289 |
+
module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad)
|
290 |
+
|
291 |
+
#j = [-2, -5, -8]
|
292 |
+
#for sj in j:
|
293 |
+
# bias_ = module_list[sj][0].bias
|
294 |
+
# bias = bias_[:modules.no * 1].view(1, -1)
|
295 |
+
# bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2)
|
296 |
+
# bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99))
|
297 |
+
# module_list[sj][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad)
|
298 |
+
except:
|
299 |
+
print('WARNING: smart bias initialization failure.')
|
300 |
+
|
301 |
+
elif mdef['type'] == 'jde':
|
302 |
+
yolo_index += 1
|
303 |
+
stride = [8, 16, 32, 64, 128] # P3, P4, P5, P6, P7 strides
|
304 |
+
if any(x in cfg for x in ['yolov4-tiny', 'fpn', 'yolov3']): # P5, P4, P3 strides
|
305 |
+
stride = [32, 16, 8]
|
306 |
+
layers = mdef['from'] if 'from' in mdef else []
|
307 |
+
modules = JDELayer(anchors=mdef['anchors'][mdef['mask']], # anchor list
|
308 |
+
nc=mdef['classes'], # number of classes
|
309 |
+
img_size=img_size, # (416, 416)
|
310 |
+
yolo_index=yolo_index, # 0, 1, 2...
|
311 |
+
layers=layers, # output layers
|
312 |
+
stride=stride[yolo_index])
|
313 |
+
|
314 |
+
# Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
|
315 |
+
try:
|
316 |
+
j = layers[yolo_index] if 'from' in mdef else -1
|
317 |
+
bias_ = module_list[j][0].bias # shape(255,)
|
318 |
+
bias = bias_[:modules.no * modules.na].view(modules.na, -1) # shape(3,85)
|
319 |
+
#bias[:, 4] += -4.5 # obj
|
320 |
+
bias.data[:, 4] += math.log(8 / (640 / stride[yolo_index]) ** 2) # obj (8 objects per 640 image)
|
321 |
+
bias.data[:, 5:] += math.log(0.6 / (modules.nc - 0.99)) # cls (sigmoid(p) = 1/nc)
|
322 |
+
module_list[j][0].bias = torch.nn.Parameter(bias_, requires_grad=bias_.requires_grad)
|
323 |
+
except:
|
324 |
+
print('WARNING: smart bias initialization failure.')
|
325 |
+
|
326 |
+
else:
|
327 |
+
print('Warning: Unrecognized Layer Type: ' + mdef['type'])
|
328 |
+
|
329 |
+
# Register module list and number of output filters
|
330 |
+
module_list.append(modules)
|
331 |
+
output_filters.append(filters)
|
332 |
+
|
333 |
+
routs_binary = [False] * (i + 1)
|
334 |
+
for i in routs:
|
335 |
+
routs_binary[i] = True
|
336 |
+
return module_list, routs_binary
|
337 |
+
|
338 |
+
|
339 |
+
class YOLOLayer(nn.Module):
|
340 |
+
def __init__(self, anchors, nc, img_size, yolo_index, layers, stride):
|
341 |
+
super(YOLOLayer, self).__init__()
|
342 |
+
self.anchors = torch.Tensor(anchors)
|
343 |
+
self.index = yolo_index # index of this layer in layers
|
344 |
+
self.layers = layers # model output layer indices
|
345 |
+
self.stride = stride # layer stride
|
346 |
+
self.nl = len(layers) # number of output layers (3)
|
347 |
+
self.na = len(anchors) # number of anchors (3)
|
348 |
+
self.nc = nc # number of classes (80)
|
349 |
+
self.no = nc + 5 # number of outputs (85)
|
350 |
+
self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints
|
351 |
+
self.anchor_vec = self.anchors / self.stride
|
352 |
+
self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2)
|
353 |
+
|
354 |
+
if ONNX_EXPORT:
|
355 |
+
self.training = False
|
356 |
+
self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points
|
357 |
+
|
358 |
+
def create_grids(self, ng=(13, 13), device='cpu'):
|
359 |
+
self.nx, self.ny = ng # x and y grid size
|
360 |
+
self.ng = torch.tensor(ng, dtype=torch.float)
|
361 |
+
|
362 |
+
# build xy offsets
|
363 |
+
if not self.training:
|
364 |
+
yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)])
|
365 |
+
self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float()
|
366 |
+
|
367 |
+
if self.anchor_vec.device != device:
|
368 |
+
self.anchor_vec = self.anchor_vec.to(device)
|
369 |
+
self.anchor_wh = self.anchor_wh.to(device)
|
370 |
+
|
371 |
+
def forward(self, p, out):
|
372 |
+
ASFF = False # https://arxiv.org/abs/1911.09516
|
373 |
+
if ASFF:
|
374 |
+
i, n = self.index, self.nl # index in layers, number of layers
|
375 |
+
p = out[self.layers[i]]
|
376 |
+
bs, _, ny, nx = p.shape # bs, 255, 13, 13
|
377 |
+
if (self.nx, self.ny) != (nx, ny):
|
378 |
+
self.create_grids((nx, ny), p.device)
|
379 |
+
|
380 |
+
# outputs and weights
|
381 |
+
# w = F.softmax(p[:, -n:], 1) # normalized weights
|
382 |
+
w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster)
|
383 |
+
# w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension
|
384 |
+
|
385 |
+
# weighted ASFF sum
|
386 |
+
p = out[self.layers[i]][:, :-n] * w[:, i:i + 1]
|
387 |
+
for j in range(n):
|
388 |
+
if j != i:
|
389 |
+
p += w[:, j:j + 1] * \
|
390 |
+
F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False)
|
391 |
+
|
392 |
+
elif ONNX_EXPORT:
|
393 |
+
bs = 1 # batch size
|
394 |
+
else:
|
395 |
+
bs, _, ny, nx = p.shape # bs, 255, 13, 13
|
396 |
+
if (self.nx, self.ny) != (nx, ny):
|
397 |
+
self.create_grids((nx, ny), p.device)
|
398 |
+
|
399 |
+
# p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh)
|
400 |
+
p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction
|
401 |
+
|
402 |
+
if self.training:
|
403 |
+
return p
|
404 |
+
|
405 |
+
elif ONNX_EXPORT:
|
406 |
+
# Avoid broadcasting for ANE operations
|
407 |
+
m = self.na * self.nx * self.ny
|
408 |
+
ng = 1. / self.ng.repeat(m, 1)
|
409 |
+
grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2)
|
410 |
+
anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng
|
411 |
+
|
412 |
+
p = p.view(m, self.no)
|
413 |
+
xy = torch.sigmoid(p[:, 0:2]) + grid # x, y
|
414 |
+
wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height
|
415 |
+
p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \
|
416 |
+
torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf
|
417 |
+
return p_cls, xy * ng, wh
|
418 |
+
|
419 |
+
else: # inference
|
420 |
+
io = p.sigmoid()
|
421 |
+
io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid)
|
422 |
+
io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh
|
423 |
+
io[..., :4] *= self.stride
|
424 |
+
#io = p.clone() # inference output
|
425 |
+
#io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid # xy
|
426 |
+
#io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method
|
427 |
+
#io[..., :4] *= self.stride
|
428 |
+
#torch.sigmoid_(io[..., 4:])
|
429 |
+
return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85]
|
430 |
+
|
431 |
+
|
432 |
+
class JDELayer(nn.Module):
|
433 |
+
def __init__(self, anchors, nc, img_size, yolo_index, layers, stride):
|
434 |
+
super(JDELayer, self).__init__()
|
435 |
+
self.anchors = torch.Tensor(anchors)
|
436 |
+
self.index = yolo_index # index of this layer in layers
|
437 |
+
self.layers = layers # model output layer indices
|
438 |
+
self.stride = stride # layer stride
|
439 |
+
self.nl = len(layers) # number of output layers (3)
|
440 |
+
self.na = len(anchors) # number of anchors (3)
|
441 |
+
self.nc = nc # number of classes (80)
|
442 |
+
self.no = nc + 5 # number of outputs (85)
|
443 |
+
self.nx, self.ny, self.ng = 0, 0, 0 # initialize number of x, y gridpoints
|
444 |
+
self.anchor_vec = self.anchors / self.stride
|
445 |
+
self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2)
|
446 |
+
|
447 |
+
if ONNX_EXPORT:
|
448 |
+
self.training = False
|
449 |
+
self.create_grids((img_size[1] // stride, img_size[0] // stride)) # number x, y grid points
|
450 |
+
|
451 |
+
def create_grids(self, ng=(13, 13), device='cpu'):
|
452 |
+
self.nx, self.ny = ng # x and y grid size
|
453 |
+
self.ng = torch.tensor(ng, dtype=torch.float)
|
454 |
+
|
455 |
+
# build xy offsets
|
456 |
+
if not self.training:
|
457 |
+
yv, xv = torch.meshgrid([torch.arange(self.ny, device=device), torch.arange(self.nx, device=device)])
|
458 |
+
self.grid = torch.stack((xv, yv), 2).view((1, 1, self.ny, self.nx, 2)).float()
|
459 |
+
|
460 |
+
if self.anchor_vec.device != device:
|
461 |
+
self.anchor_vec = self.anchor_vec.to(device)
|
462 |
+
self.anchor_wh = self.anchor_wh.to(device)
|
463 |
+
|
464 |
+
def forward(self, p, out):
|
465 |
+
ASFF = False # https://arxiv.org/abs/1911.09516
|
466 |
+
if ASFF:
|
467 |
+
i, n = self.index, self.nl # index in layers, number of layers
|
468 |
+
p = out[self.layers[i]]
|
469 |
+
bs, _, ny, nx = p.shape # bs, 255, 13, 13
|
470 |
+
if (self.nx, self.ny) != (nx, ny):
|
471 |
+
self.create_grids((nx, ny), p.device)
|
472 |
+
|
473 |
+
# outputs and weights
|
474 |
+
# w = F.softmax(p[:, -n:], 1) # normalized weights
|
475 |
+
w = torch.sigmoid(p[:, -n:]) * (2 / n) # sigmoid weights (faster)
|
476 |
+
# w = w / w.sum(1).unsqueeze(1) # normalize across layer dimension
|
477 |
+
|
478 |
+
# weighted ASFF sum
|
479 |
+
p = out[self.layers[i]][:, :-n] * w[:, i:i + 1]
|
480 |
+
for j in range(n):
|
481 |
+
if j != i:
|
482 |
+
p += w[:, j:j + 1] * \
|
483 |
+
F.interpolate(out[self.layers[j]][:, :-n], size=[ny, nx], mode='bilinear', align_corners=False)
|
484 |
+
|
485 |
+
elif ONNX_EXPORT:
|
486 |
+
bs = 1 # batch size
|
487 |
+
else:
|
488 |
+
bs, _, ny, nx = p.shape # bs, 255, 13, 13
|
489 |
+
if (self.nx, self.ny) != (nx, ny):
|
490 |
+
self.create_grids((nx, ny), p.device)
|
491 |
+
|
492 |
+
# p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh)
|
493 |
+
p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction
|
494 |
+
|
495 |
+
if self.training:
|
496 |
+
return p
|
497 |
+
|
498 |
+
elif ONNX_EXPORT:
|
499 |
+
# Avoid broadcasting for ANE operations
|
500 |
+
m = self.na * self.nx * self.ny
|
501 |
+
ng = 1. / self.ng.repeat(m, 1)
|
502 |
+
grid = self.grid.repeat(1, self.na, 1, 1, 1).view(m, 2)
|
503 |
+
anchor_wh = self.anchor_wh.repeat(1, 1, self.nx, self.ny, 1).view(m, 2) * ng
|
504 |
+
|
505 |
+
p = p.view(m, self.no)
|
506 |
+
xy = torch.sigmoid(p[:, 0:2]) + grid # x, y
|
507 |
+
wh = torch.exp(p[:, 2:4]) * anchor_wh # width, height
|
508 |
+
p_cls = torch.sigmoid(p[:, 4:5]) if self.nc == 1 else \
|
509 |
+
torch.sigmoid(p[:, 5:self.no]) * torch.sigmoid(p[:, 4:5]) # conf
|
510 |
+
return p_cls, xy * ng, wh
|
511 |
+
|
512 |
+
else: # inference
|
513 |
+
#io = p.sigmoid()
|
514 |
+
#io[..., :2] = (io[..., :2] * 2. - 0.5 + self.grid)
|
515 |
+
#io[..., 2:4] = (io[..., 2:4] * 2) ** 2 * self.anchor_wh
|
516 |
+
#io[..., :4] *= self.stride
|
517 |
+
io = p.clone() # inference output
|
518 |
+
io[..., :2] = torch.sigmoid(io[..., :2]) * 2. - 0.5 + self.grid # xy
|
519 |
+
io[..., 2:4] = (torch.sigmoid(io[..., 2:4]) * 2) ** 2 * self.anchor_wh # wh yolo method
|
520 |
+
io[..., :4] *= self.stride
|
521 |
+
io[..., 4:] = F.softmax(io[..., 4:])
|
522 |
+
return io.view(bs, -1, self.no), p # view [1, 3, 13, 13, 85] as [1, 507, 85]
|
523 |
+
|
524 |
+
class Darknet(nn.Module):
|
525 |
+
# YOLOv3 object detection model
|
526 |
+
|
527 |
+
def __init__(self, cfg, img_size=(416, 416), verbose=False):
|
528 |
+
super(Darknet, self).__init__()
|
529 |
+
|
530 |
+
self.module_defs = parse_model_cfg(cfg)
|
531 |
+
self.module_list, self.routs = create_modules(self.module_defs, img_size, cfg)
|
532 |
+
self.yolo_layers = get_yolo_layers(self)
|
533 |
+
# torch_utils.initialize_weights(self)
|
534 |
+
|
535 |
+
# Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
|
536 |
+
self.version = np.array([0, 2, 5], dtype=np.int32) # (int32) version info: major, minor, revision
|
537 |
+
self.seen = np.array([0], dtype=np.int64) # (int64) number of images seen during training
|
538 |
+
self.info(verbose) if not ONNX_EXPORT else None # print model description
|
539 |
+
|
540 |
+
def forward(self, x, augment=False, verbose=False):
|
541 |
+
|
542 |
+
if not augment:
|
543 |
+
return self.forward_once(x)
|
544 |
+
else: # Augment images (inference and test only) https://github.com/ultralytics/yolov3/issues/931
|
545 |
+
img_size = x.shape[-2:] # height, width
|
546 |
+
s = [0.83, 0.67] # scales
|
547 |
+
y = []
|
548 |
+
for i, xi in enumerate((x,
|
549 |
+
torch_utils.scale_img(x.flip(3), s[0], same_shape=False), # flip-lr and scale
|
550 |
+
torch_utils.scale_img(x, s[1], same_shape=False), # scale
|
551 |
+
)):
|
552 |
+
# cv2.imwrite('img%g.jpg' % i, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1])
|
553 |
+
y.append(self.forward_once(xi)[0])
|
554 |
+
|
555 |
+
y[1][..., :4] /= s[0] # scale
|
556 |
+
y[1][..., 0] = img_size[1] - y[1][..., 0] # flip lr
|
557 |
+
y[2][..., :4] /= s[1] # scale
|
558 |
+
|
559 |
+
# for i, yi in enumerate(y): # coco small, medium, large = < 32**2 < 96**2 <
|
560 |
+
# area = yi[..., 2:4].prod(2)[:, :, None]
|
561 |
+
# if i == 1:
|
562 |
+
# yi *= (area < 96. ** 2).float()
|
563 |
+
# elif i == 2:
|
564 |
+
# yi *= (area > 32. ** 2).float()
|
565 |
+
# y[i] = yi
|
566 |
+
|
567 |
+
y = torch.cat(y, 1)
|
568 |
+
return y, None
|
569 |
+
|
570 |
+
def forward_once(self, x, augment=False, verbose=False):
|
571 |
+
img_size = x.shape[-2:] # height, width
|
572 |
+
yolo_out, out = [], []
|
573 |
+
if verbose:
|
574 |
+
print('0', x.shape)
|
575 |
+
str = ''
|
576 |
+
|
577 |
+
# Augment images (inference and test only)
|
578 |
+
if augment: # https://github.com/ultralytics/yolov3/issues/931
|
579 |
+
nb = x.shape[0] # batch size
|
580 |
+
s = [0.83, 0.67] # scales
|
581 |
+
x = torch.cat((x,
|
582 |
+
torch_utils.scale_img(x.flip(3), s[0]), # flip-lr and scale
|
583 |
+
torch_utils.scale_img(x, s[1]), # scale
|
584 |
+
), 0)
|
585 |
+
|
586 |
+
for i, module in enumerate(self.module_list):
|
587 |
+
name = module.__class__.__name__
|
588 |
+
#print(name)
|
589 |
+
if name in ['WeightedFeatureFusion', 'FeatureConcat', 'FeatureConcat2', 'FeatureConcat3', 'FeatureConcat_l', 'ScaleChannel', 'ShiftChannel', 'ShiftChannel2D', 'ControlChannel', 'ControlChannel2D', 'AlternateChannel', 'AlternateChannel2D', 'SelectChannel', 'SelectChannel2D', 'ScaleSpatial']: # sum, concat
|
590 |
+
if verbose:
|
591 |
+
l = [i - 1] + module.layers # layers
|
592 |
+
sh = [list(x.shape)] + [list(out[i].shape) for i in module.layers] # shapes
|
593 |
+
str = ' >> ' + ' + '.join(['layer %g %s' % x for x in zip(l, sh)])
|
594 |
+
x = module(x, out) # WeightedFeatureFusion(), FeatureConcat()
|
595 |
+
elif name in ['ImplicitA', 'ImplicitM', 'ImplicitC', 'Implicit2DA', 'Implicit2DM', 'Implicit2DC']:
|
596 |
+
x = module()
|
597 |
+
elif name == 'YOLOLayer':
|
598 |
+
yolo_out.append(module(x, out))
|
599 |
+
elif name == 'JDELayer':
|
600 |
+
yolo_out.append(module(x, out))
|
601 |
+
else: # run module directly, i.e. mtype = 'convolutional', 'upsample', 'maxpool', 'batchnorm2d' etc.
|
602 |
+
#print(module)
|
603 |
+
#print(x.shape)
|
604 |
+
x = module(x)
|
605 |
+
|
606 |
+
out.append(x if self.routs[i] else [])
|
607 |
+
if verbose:
|
608 |
+
print('%g/%g %s -' % (i, len(self.module_list), name), list(x.shape), str)
|
609 |
+
str = ''
|
610 |
+
|
611 |
+
if self.training: # train
|
612 |
+
return yolo_out
|
613 |
+
elif ONNX_EXPORT: # export
|
614 |
+
x = [torch.cat(x, 0) for x in zip(*yolo_out)]
|
615 |
+
return x[0], torch.cat(x[1:3], 1) # scores, boxes: 3780x80, 3780x4
|
616 |
+
else: # inference or test
|
617 |
+
x, p = zip(*yolo_out) # inference output, training output
|
618 |
+
x = torch.cat(x, 1) # cat yolo outputs
|
619 |
+
if augment: # de-augment results
|
620 |
+
x = torch.split(x, nb, dim=0)
|
621 |
+
x[1][..., :4] /= s[0] # scale
|
622 |
+
x[1][..., 0] = img_size[1] - x[1][..., 0] # flip lr
|
623 |
+
x[2][..., :4] /= s[1] # scale
|
624 |
+
x = torch.cat(x, 1)
|
625 |
+
return x, p
|
626 |
+
|
627 |
+
def fuse(self):
|
628 |
+
# Fuse Conv2d + BatchNorm2d layers throughout model
|
629 |
+
print('Fusing layers...')
|
630 |
+
fused_list = nn.ModuleList()
|
631 |
+
for a in list(self.children())[0]:
|
632 |
+
if isinstance(a, nn.Sequential):
|
633 |
+
for i, b in enumerate(a):
|
634 |
+
if isinstance(b, nn.modules.batchnorm.BatchNorm2d):
|
635 |
+
# fuse this bn layer with the previous conv2d layer
|
636 |
+
conv = a[i - 1]
|
637 |
+
fused = torch_utils.fuse_conv_and_bn(conv, b)
|
638 |
+
a = nn.Sequential(fused, *list(a.children())[i + 1:])
|
639 |
+
break
|
640 |
+
fused_list.append(a)
|
641 |
+
self.module_list = fused_list
|
642 |
+
self.info() if not ONNX_EXPORT else None # yolov3-spp reduced from 225 to 152 layers
|
643 |
+
|
644 |
+
def info(self, verbose=False):
|
645 |
+
torch_utils.model_info(self, verbose)
|
646 |
+
|
647 |
+
|
648 |
+
def get_yolo_layers(model):
|
649 |
+
return [i for i, m in enumerate(model.module_list) if m.__class__.__name__ in ['YOLOLayer', 'JDELayer']] # [89, 101, 113]
|
650 |
+
|
651 |
+
|
652 |
+
def load_darknet_weights(self, weights, cutoff=-1):
|
653 |
+
# Parses and loads the weights stored in 'weights'
|
654 |
+
|
655 |
+
# Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded)
|
656 |
+
file = Path(weights).name
|
657 |
+
if file == 'darknet53.conv.74':
|
658 |
+
cutoff = 75
|
659 |
+
elif file == 'yolov3-tiny.conv.15':
|
660 |
+
cutoff = 15
|
661 |
+
|
662 |
+
# Read weights file
|
663 |
+
with open(weights, 'rb') as f:
|
664 |
+
# Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
|
665 |
+
self.version = np.fromfile(f, dtype=np.int32, count=3) # (int32) version info: major, minor, revision
|
666 |
+
self.seen = np.fromfile(f, dtype=np.int64, count=1) # (int64) number of images seen during training
|
667 |
+
|
668 |
+
weights = np.fromfile(f, dtype=np.float32) # the rest are weights
|
669 |
+
|
670 |
+
ptr = 0
|
671 |
+
for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
|
672 |
+
if mdef['type'] == 'convolutional':
|
673 |
+
conv = module[0]
|
674 |
+
if mdef['batch_normalize']:
|
675 |
+
# Load BN bias, weights, running mean and running variance
|
676 |
+
bn = module[1]
|
677 |
+
nb = bn.bias.numel() # number of biases
|
678 |
+
# Bias
|
679 |
+
bn.bias.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.bias))
|
680 |
+
ptr += nb
|
681 |
+
# Weight
|
682 |
+
bn.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.weight))
|
683 |
+
ptr += nb
|
684 |
+
# Running Mean
|
685 |
+
bn.running_mean.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_mean))
|
686 |
+
ptr += nb
|
687 |
+
# Running Var
|
688 |
+
bn.running_var.data.copy_(torch.from_numpy(weights[ptr:ptr + nb]).view_as(bn.running_var))
|
689 |
+
ptr += nb
|
690 |
+
else:
|
691 |
+
# Load conv. bias
|
692 |
+
nb = conv.bias.numel()
|
693 |
+
conv_b = torch.from_numpy(weights[ptr:ptr + nb]).view_as(conv.bias)
|
694 |
+
conv.bias.data.copy_(conv_b)
|
695 |
+
ptr += nb
|
696 |
+
# Load conv. weights
|
697 |
+
nw = conv.weight.numel() # number of weights
|
698 |
+
conv.weight.data.copy_(torch.from_numpy(weights[ptr:ptr + nw]).view_as(conv.weight))
|
699 |
+
ptr += nw
|
700 |
+
|
701 |
+
|
702 |
+
def save_weights(self, path='model.weights', cutoff=-1):
|
703 |
+
# Converts a PyTorch model to Darket format (*.pt to *.weights)
|
704 |
+
# Note: Does not work if model.fuse() is applied
|
705 |
+
with open(path, 'wb') as f:
|
706 |
+
# Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
|
707 |
+
self.version.tofile(f) # (int32) version info: major, minor, revision
|
708 |
+
self.seen.tofile(f) # (int64) number of images seen during training
|
709 |
+
|
710 |
+
# Iterate through layers
|
711 |
+
for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
|
712 |
+
if mdef['type'] == 'convolutional':
|
713 |
+
conv_layer = module[0]
|
714 |
+
# If batch norm, load bn first
|
715 |
+
if mdef['batch_normalize']:
|
716 |
+
bn_layer = module[1]
|
717 |
+
bn_layer.bias.data.cpu().numpy().tofile(f)
|
718 |
+
bn_layer.weight.data.cpu().numpy().tofile(f)
|
719 |
+
bn_layer.running_mean.data.cpu().numpy().tofile(f)
|
720 |
+
bn_layer.running_var.data.cpu().numpy().tofile(f)
|
721 |
+
# Load conv bias
|
722 |
+
else:
|
723 |
+
conv_layer.bias.data.cpu().numpy().tofile(f)
|
724 |
+
# Load conv weights
|
725 |
+
conv_layer.weight.data.cpu().numpy().tofile(f)
|
726 |
+
|
727 |
+
|
728 |
+
def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights', saveto='converted.weights'):
|
729 |
+
# Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa)
|
730 |
+
# from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights')
|
731 |
+
|
732 |
+
# Initialize model
|
733 |
+
model = Darknet(cfg)
|
734 |
+
ckpt = torch.load(weights) # load checkpoint
|
735 |
+
try:
|
736 |
+
ckpt['model'] = {k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
|
737 |
+
model.load_state_dict(ckpt['model'], strict=False)
|
738 |
+
save_weights(model, path=saveto, cutoff=-1)
|
739 |
+
except KeyError as e:
|
740 |
+
print(e)
|
741 |
+
|
742 |
+
def attempt_download(weights):
|
743 |
+
# Attempt to download pretrained weights if not found locally
|
744 |
+
weights = weights.strip()
|
745 |
+
msg = weights + ' missing, try downloading from https://drive.google.com/open?id=1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0'
|
746 |
+
|
747 |
+
if len(weights) > 0 and not os.path.isfile(weights):
|
748 |
+
d = {''}
|
749 |
+
|
750 |
+
file = Path(weights).name
|
751 |
+
if file in d:
|
752 |
+
r = gdrive_download(id=d[file], name=weights)
|
753 |
+
else: # download from pjreddie.com
|
754 |
+
url = 'https://pjreddie.com/media/files/' + file
|
755 |
+
print('Downloading ' + url)
|
756 |
+
r = os.system('curl -f ' + url + ' -o ' + weights)
|
757 |
+
|
758 |
+
# Error check
|
759 |
+
if not (r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6): # weights exist and > 1MB
|
760 |
+
os.system('rm ' + weights) # remove partial downloads
|
761 |
+
raise Exception(msg)
|
asone/detectors/yolor/utils/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
asone/detectors/yolor/utils/activations.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Activation functions
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
|
8 |
+
# Swish https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------
|
9 |
+
class Swish(nn.Module): #
|
10 |
+
@staticmethod
|
11 |
+
def forward(x):
|
12 |
+
return x * torch.sigmoid(x)
|
13 |
+
|
14 |
+
|
15 |
+
class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
|
16 |
+
@staticmethod
|
17 |
+
def forward(x):
|
18 |
+
# return x * F.hardsigmoid(x) # for torchscript and CoreML
|
19 |
+
return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
|
20 |
+
|
21 |
+
|
22 |
+
class MemoryEfficientSwish(nn.Module):
|
23 |
+
class F(torch.autograd.Function):
|
24 |
+
@staticmethod
|
25 |
+
def forward(ctx, x):
|
26 |
+
ctx.save_for_backward(x)
|
27 |
+
return x * torch.sigmoid(x)
|
28 |
+
|
29 |
+
@staticmethod
|
30 |
+
def backward(ctx, grad_output):
|
31 |
+
x = ctx.saved_tensors[0]
|
32 |
+
sx = torch.sigmoid(x)
|
33 |
+
return grad_output * (sx * (1 + x * (1 - sx)))
|
34 |
+
|
35 |
+
def forward(self, x):
|
36 |
+
return self.F.apply(x)
|
37 |
+
|
38 |
+
|
39 |
+
# Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
|
40 |
+
class Mish(nn.Module):
|
41 |
+
@staticmethod
|
42 |
+
def forward(x):
|
43 |
+
return x * F.softplus(x).tanh()
|
44 |
+
|
45 |
+
|
46 |
+
class MemoryEfficientMish(nn.Module):
|
47 |
+
class F(torch.autograd.Function):
|
48 |
+
@staticmethod
|
49 |
+
def forward(ctx, x):
|
50 |
+
ctx.save_for_backward(x)
|
51 |
+
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
|
52 |
+
|
53 |
+
@staticmethod
|
54 |
+
def backward(ctx, grad_output):
|
55 |
+
x = ctx.saved_tensors[0]
|
56 |
+
sx = torch.sigmoid(x)
|
57 |
+
fx = F.softplus(x).tanh()
|
58 |
+
return grad_output * (fx + x * sx * (1 - fx * fx))
|
59 |
+
|
60 |
+
def forward(self, x):
|
61 |
+
return self.F.apply(x)
|
62 |
+
|
63 |
+
|
64 |
+
# FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
|
65 |
+
class FReLU(nn.Module):
|
66 |
+
def __init__(self, c1, k=3): # ch_in, kernel
|
67 |
+
super().__init__()
|
68 |
+
self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1)
|
69 |
+
self.bn = nn.BatchNorm2d(c1)
|
70 |
+
|
71 |
+
def forward(self, x):
|
72 |
+
return torch.max(x, self.bn(self.conv(x)))
|
asone/detectors/yolor/utils/autoanchor.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Auto-anchor utils
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
import yaml
|
6 |
+
from scipy.cluster.vq import kmeans
|
7 |
+
from tqdm import tqdm
|
8 |
+
|
9 |
+
|
10 |
+
def check_anchor_order(m):
|
11 |
+
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
|
12 |
+
a = m.anchor_grid.prod(-1).view(-1) # anchor area
|
13 |
+
da = a[-1] - a[0] # delta a
|
14 |
+
ds = m.stride[-1] - m.stride[0] # delta s
|
15 |
+
if da.sign() != ds.sign(): # same order
|
16 |
+
print('Reversing anchor order')
|
17 |
+
m.anchors[:] = m.anchors.flip(0)
|
18 |
+
m.anchor_grid[:] = m.anchor_grid.flip(0)
|
19 |
+
|
20 |
+
|
21 |
+
def check_anchors(dataset, model, thr=4.0, imgsz=640):
|
22 |
+
# Check anchor fit to data, recompute if necessary
|
23 |
+
print('\nAnalyzing anchors... ', end='')
|
24 |
+
m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
|
25 |
+
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
|
26 |
+
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
|
27 |
+
wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
|
28 |
+
|
29 |
+
def metric(k): # compute metric
|
30 |
+
r = wh[:, None] / k[None]
|
31 |
+
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
|
32 |
+
best = x.max(1)[0] # best_x
|
33 |
+
aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold
|
34 |
+
bpr = (best > 1. / thr).float().mean() # best possible recall
|
35 |
+
return bpr, aat
|
36 |
+
|
37 |
+
bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
|
38 |
+
print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='')
|
39 |
+
if bpr < 0.98: # threshold to recompute
|
40 |
+
print('. Attempting to improve anchors, please wait...')
|
41 |
+
na = m.anchor_grid.numel() // 2 # number of anchors
|
42 |
+
new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
|
43 |
+
new_bpr = metric(new_anchors.reshape(-1, 2))[0]
|
44 |
+
if new_bpr > bpr: # replace anchors
|
45 |
+
new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors)
|
46 |
+
m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference
|
47 |
+
m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
|
48 |
+
check_anchor_order(m)
|
49 |
+
print('New anchors saved to model. Update model *.yaml to use these anchors in the future.')
|
50 |
+
else:
|
51 |
+
print('Original anchors better than new anchors. Proceeding with original anchors.')
|
52 |
+
print('') # newline
|
53 |
+
|
54 |
+
|
55 |
+
def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
|
56 |
+
""" Creates kmeans-evolved anchors from training dataset
|
57 |
+
|
58 |
+
Arguments:
|
59 |
+
path: path to dataset *.yaml, or a loaded dataset
|
60 |
+
n: number of anchors
|
61 |
+
img_size: image size used for training
|
62 |
+
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
|
63 |
+
gen: generations to evolve anchors using genetic algorithm
|
64 |
+
verbose: print all results
|
65 |
+
|
66 |
+
Return:
|
67 |
+
k: kmeans evolved anchors
|
68 |
+
|
69 |
+
Usage:
|
70 |
+
from utils.general import *; _ = kmean_anchors()
|
71 |
+
"""
|
72 |
+
thr = 1. / thr
|
73 |
+
|
74 |
+
def metric(k, wh): # compute metrics
|
75 |
+
r = wh[:, None] / k[None]
|
76 |
+
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
|
77 |
+
# x = wh_iou(wh, torch.tensor(k)) # iou metric
|
78 |
+
return x, x.max(1)[0] # x, best_x
|
79 |
+
|
80 |
+
def anchor_fitness(k): # mutation fitness
|
81 |
+
_, best = metric(torch.tensor(k, dtype=torch.float32), wh)
|
82 |
+
return (best * (best > thr).float()).mean() # fitness
|
83 |
+
|
84 |
+
def print_results(k):
|
85 |
+
k = k[np.argsort(k.prod(1))] # sort small to large
|
86 |
+
x, best = metric(k, wh0)
|
87 |
+
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
|
88 |
+
print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat))
|
89 |
+
print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' %
|
90 |
+
(n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='')
|
91 |
+
for i, x in enumerate(k):
|
92 |
+
print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
|
93 |
+
return k
|
94 |
+
|
95 |
+
if isinstance(path, str): # *.yaml file
|
96 |
+
with open(path) as f:
|
97 |
+
data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
98 |
+
from utils.datasets import LoadImagesAndLabels
|
99 |
+
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
|
100 |
+
else:
|
101 |
+
dataset = path # dataset
|
102 |
+
|
103 |
+
# Get label wh
|
104 |
+
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
|
105 |
+
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
|
106 |
+
|
107 |
+
# Filter
|
108 |
+
i = (wh0 < 3.0).any(1).sum()
|
109 |
+
if i:
|
110 |
+
print('WARNING: Extremely small objects found. '
|
111 |
+
'%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
|
112 |
+
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
|
113 |
+
|
114 |
+
# Kmeans calculation
|
115 |
+
print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
|
116 |
+
s = wh.std(0) # sigmas for whitening
|
117 |
+
k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
|
118 |
+
k *= s
|
119 |
+
wh = torch.tensor(wh, dtype=torch.float32) # filtered
|
120 |
+
wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
|
121 |
+
k = print_results(k)
|
122 |
+
|
123 |
+
# Plot
|
124 |
+
# k, d = [None] * 20, [None] * 20
|
125 |
+
# for i in tqdm(range(1, 21)):
|
126 |
+
# k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
|
127 |
+
# fig, ax = plt.subplots(1, 2, figsize=(14, 7))
|
128 |
+
# ax = ax.ravel()
|
129 |
+
# ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
|
130 |
+
# fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
|
131 |
+
# ax[0].hist(wh[wh[:, 0]<100, 0],400)
|
132 |
+
# ax[1].hist(wh[wh[:, 1]<100, 1],400)
|
133 |
+
# fig.tight_layout()
|
134 |
+
# fig.savefig('wh.png', dpi=200)
|
135 |
+
|
136 |
+
# Evolve
|
137 |
+
npr = np.random
|
138 |
+
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
|
139 |
+
pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar
|
140 |
+
for _ in pbar:
|
141 |
+
v = np.ones(sh)
|
142 |
+
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
|
143 |
+
v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
|
144 |
+
kg = (k.copy() * v).clip(min=2.0)
|
145 |
+
fg = anchor_fitness(kg)
|
146 |
+
if fg > f:
|
147 |
+
f, k = fg, kg.copy()
|
148 |
+
pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f
|
149 |
+
if verbose:
|
150 |
+
print_results(k)
|
151 |
+
|
152 |
+
return print_results(k)
|
asone/detectors/yolor/utils/datasets.py
ADDED
@@ -0,0 +1,1297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Dataset utils and dataloaders
|
2 |
+
|
3 |
+
import glob
|
4 |
+
import math
|
5 |
+
import os
|
6 |
+
import random
|
7 |
+
import shutil
|
8 |
+
import time
|
9 |
+
from itertools import repeat
|
10 |
+
from multiprocessing.pool import ThreadPool
|
11 |
+
from pathlib import Path
|
12 |
+
from threading import Thread
|
13 |
+
|
14 |
+
import cv2
|
15 |
+
import numpy as np
|
16 |
+
import torch
|
17 |
+
from PIL import Image, ExifTags
|
18 |
+
from torch.utils.data import Dataset
|
19 |
+
from tqdm import tqdm
|
20 |
+
|
21 |
+
import pickle
|
22 |
+
from copy import deepcopy
|
23 |
+
from pycocotools import mask as maskUtils
|
24 |
+
from torchvision.utils import save_image
|
25 |
+
|
26 |
+
from asone.detectors.yolor.utils.general import xyxy2xywh, xywh2xyxy
|
27 |
+
from asone.detectors.yolor.utils.torch_utils import torch_distributed_zero_first
|
28 |
+
|
29 |
+
# Parameters
|
30 |
+
help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
|
31 |
+
img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes
|
32 |
+
vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
|
33 |
+
|
34 |
+
# Get orientation exif tag
|
35 |
+
for orientation in ExifTags.TAGS.keys():
|
36 |
+
if ExifTags.TAGS[orientation] == 'Orientation':
|
37 |
+
break
|
38 |
+
|
39 |
+
|
40 |
+
def get_hash(files):
|
41 |
+
# Returns a single hash value of a list of files
|
42 |
+
return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
|
43 |
+
|
44 |
+
|
45 |
+
def exif_size(img):
|
46 |
+
# Returns exif-corrected PIL size
|
47 |
+
s = img.size # (width, height)
|
48 |
+
try:
|
49 |
+
rotation = dict(img._getexif().items())[orientation]
|
50 |
+
if rotation == 6: # rotation 270
|
51 |
+
s = (s[1], s[0])
|
52 |
+
elif rotation == 8: # rotation 90
|
53 |
+
s = (s[1], s[0])
|
54 |
+
except:
|
55 |
+
pass
|
56 |
+
|
57 |
+
return s
|
58 |
+
|
59 |
+
|
60 |
+
def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
|
61 |
+
rank=-1, world_size=1, workers=8):
|
62 |
+
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache
|
63 |
+
with torch_distributed_zero_first(rank):
|
64 |
+
dataset = LoadImagesAndLabels(path, imgsz, batch_size,
|
65 |
+
augment=augment, # augment images
|
66 |
+
hyp=hyp, # augmentation hyperparameters
|
67 |
+
rect=rect, # rectangular training
|
68 |
+
cache_images=cache,
|
69 |
+
single_cls=opt.single_cls,
|
70 |
+
stride=int(stride),
|
71 |
+
pad=pad,
|
72 |
+
rank=rank)
|
73 |
+
|
74 |
+
batch_size = min(batch_size, len(dataset))
|
75 |
+
nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers
|
76 |
+
sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
|
77 |
+
dataloader = InfiniteDataLoader(dataset,
|
78 |
+
batch_size=batch_size,
|
79 |
+
num_workers=nw,
|
80 |
+
sampler=sampler,
|
81 |
+
pin_memory=True,
|
82 |
+
collate_fn=LoadImagesAndLabels.collate_fn) # torch.utils.data.DataLoader()
|
83 |
+
return dataloader, dataset
|
84 |
+
|
85 |
+
|
86 |
+
def create_dataloader9(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
|
87 |
+
rank=-1, world_size=1, workers=8):
|
88 |
+
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache
|
89 |
+
with torch_distributed_zero_first(rank):
|
90 |
+
dataset = LoadImagesAndLabels9(path, imgsz, batch_size,
|
91 |
+
augment=augment, # augment images
|
92 |
+
hyp=hyp, # augmentation hyperparameters
|
93 |
+
rect=rect, # rectangular training
|
94 |
+
cache_images=cache,
|
95 |
+
single_cls=opt.single_cls,
|
96 |
+
stride=int(stride),
|
97 |
+
pad=pad,
|
98 |
+
rank=rank)
|
99 |
+
|
100 |
+
batch_size = min(batch_size, len(dataset))
|
101 |
+
nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers
|
102 |
+
sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
|
103 |
+
dataloader = InfiniteDataLoader(dataset,
|
104 |
+
batch_size=batch_size,
|
105 |
+
num_workers=nw,
|
106 |
+
sampler=sampler,
|
107 |
+
pin_memory=True,
|
108 |
+
collate_fn=LoadImagesAndLabels9.collate_fn) # torch.utils.data.DataLoader()
|
109 |
+
return dataloader, dataset
|
110 |
+
|
111 |
+
|
112 |
+
class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
|
113 |
+
""" Dataloader that reuses workers
|
114 |
+
|
115 |
+
Uses same syntax as vanilla DataLoader
|
116 |
+
"""
|
117 |
+
|
118 |
+
def __init__(self, *args, **kwargs):
|
119 |
+
super().__init__(*args, **kwargs)
|
120 |
+
object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
|
121 |
+
self.iterator = super().__iter__()
|
122 |
+
|
123 |
+
def __len__(self):
|
124 |
+
return len(self.batch_sampler.sampler)
|
125 |
+
|
126 |
+
def __iter__(self):
|
127 |
+
for i in range(len(self)):
|
128 |
+
yield next(self.iterator)
|
129 |
+
|
130 |
+
|
131 |
+
class _RepeatSampler(object):
|
132 |
+
""" Sampler that repeats forever
|
133 |
+
|
134 |
+
Args:
|
135 |
+
sampler (Sampler)
|
136 |
+
"""
|
137 |
+
|
138 |
+
def __init__(self, sampler):
|
139 |
+
self.sampler = sampler
|
140 |
+
|
141 |
+
def __iter__(self):
|
142 |
+
while True:
|
143 |
+
yield from iter(self.sampler)
|
144 |
+
|
145 |
+
|
146 |
+
class LoadImages: # for inference
|
147 |
+
def __init__(self, path, img_size=640, auto_size=32):
|
148 |
+
p = str(Path(path)) # os-agnostic
|
149 |
+
p = os.path.abspath(p) # absolute path
|
150 |
+
if '*' in p:
|
151 |
+
files = sorted(glob.glob(p, recursive=True)) # glob
|
152 |
+
elif os.path.isdir(p):
|
153 |
+
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
|
154 |
+
elif os.path.isfile(p):
|
155 |
+
files = [p] # files
|
156 |
+
else:
|
157 |
+
raise Exception('ERROR: %s does not exist' % p)
|
158 |
+
|
159 |
+
images = [x for x in files if x.split('.')[-1].lower() in img_formats]
|
160 |
+
videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
|
161 |
+
ni, nv = len(images), len(videos)
|
162 |
+
|
163 |
+
self.img_size = img_size
|
164 |
+
self.auto_size = auto_size
|
165 |
+
self.files = images + videos
|
166 |
+
self.nf = ni + nv # number of files
|
167 |
+
self.video_flag = [False] * ni + [True] * nv
|
168 |
+
self.mode = 'images'
|
169 |
+
if any(videos):
|
170 |
+
self.new_video(videos[0]) # new video
|
171 |
+
else:
|
172 |
+
self.cap = None
|
173 |
+
assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
|
174 |
+
(p, img_formats, vid_formats)
|
175 |
+
|
176 |
+
def __iter__(self):
|
177 |
+
self.count = 0
|
178 |
+
return self
|
179 |
+
|
180 |
+
def __next__(self):
|
181 |
+
if self.count == self.nf:
|
182 |
+
raise StopIteration
|
183 |
+
path = self.files[self.count]
|
184 |
+
|
185 |
+
if self.video_flag[self.count]:
|
186 |
+
# Read video
|
187 |
+
self.mode = 'video'
|
188 |
+
ret_val, img0 = self.cap.read()
|
189 |
+
if not ret_val:
|
190 |
+
self.count += 1
|
191 |
+
self.cap.release()
|
192 |
+
if self.count == self.nf: # last video
|
193 |
+
raise StopIteration
|
194 |
+
else:
|
195 |
+
path = self.files[self.count]
|
196 |
+
self.new_video(path)
|
197 |
+
ret_val, img0 = self.cap.read()
|
198 |
+
|
199 |
+
self.frame += 1
|
200 |
+
print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
|
201 |
+
|
202 |
+
else:
|
203 |
+
# Read image
|
204 |
+
self.count += 1
|
205 |
+
img0 = cv2.imread(path) # BGR
|
206 |
+
assert img0 is not None, 'Image Not Found ' + path
|
207 |
+
print('image %g/%g %s: ' % (self.count, self.nf, path), end='')
|
208 |
+
|
209 |
+
# Padded resize
|
210 |
+
img = letterbox(img0, new_shape=self.img_size, auto_size=self.auto_size)[0]
|
211 |
+
|
212 |
+
# Convert
|
213 |
+
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
|
214 |
+
img = np.ascontiguousarray(img)
|
215 |
+
|
216 |
+
return path, img, img0, self.cap
|
217 |
+
|
218 |
+
def new_video(self, path):
|
219 |
+
self.frame = 0
|
220 |
+
self.cap = cv2.VideoCapture(path)
|
221 |
+
self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
222 |
+
|
223 |
+
def __len__(self):
|
224 |
+
return self.nf # number of files
|
225 |
+
|
226 |
+
|
227 |
+
class LoadWebcam: # for inference
|
228 |
+
def __init__(self, pipe='0', img_size=640):
|
229 |
+
self.img_size = img_size
|
230 |
+
|
231 |
+
if pipe.isnumeric():
|
232 |
+
pipe = eval(pipe) # local camera
|
233 |
+
# pipe = 'rtsp://192.168.1.64/1' # IP camera
|
234 |
+
# pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
|
235 |
+
# pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
|
236 |
+
|
237 |
+
self.pipe = pipe
|
238 |
+
self.cap = cv2.VideoCapture(pipe) # video capture object
|
239 |
+
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
|
240 |
+
|
241 |
+
def __iter__(self):
|
242 |
+
self.count = -1
|
243 |
+
return self
|
244 |
+
|
245 |
+
def __next__(self):
|
246 |
+
self.count += 1
|
247 |
+
if cv2.waitKey(1) == ord('q'): # q to quit
|
248 |
+
self.cap.release()
|
249 |
+
cv2.destroyAllWindows()
|
250 |
+
raise StopIteration
|
251 |
+
|
252 |
+
# Read frame
|
253 |
+
if self.pipe == 0: # local camera
|
254 |
+
ret_val, img0 = self.cap.read()
|
255 |
+
img0 = cv2.flip(img0, 1) # flip left-right
|
256 |
+
else: # IP camera
|
257 |
+
n = 0
|
258 |
+
while True:
|
259 |
+
n += 1
|
260 |
+
self.cap.grab()
|
261 |
+
if n % 30 == 0: # skip frames
|
262 |
+
ret_val, img0 = self.cap.retrieve()
|
263 |
+
if ret_val:
|
264 |
+
break
|
265 |
+
|
266 |
+
# Print
|
267 |
+
assert ret_val, 'Camera Error %s' % self.pipe
|
268 |
+
img_path = 'webcam.jpg'
|
269 |
+
print('webcam %g: ' % self.count, end='')
|
270 |
+
|
271 |
+
# Padded resize
|
272 |
+
img = letterbox(img0, new_shape=self.img_size)[0]
|
273 |
+
|
274 |
+
# Convert
|
275 |
+
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
|
276 |
+
img = np.ascontiguousarray(img)
|
277 |
+
|
278 |
+
return img_path, img, img0, None
|
279 |
+
|
280 |
+
def __len__(self):
|
281 |
+
return 0
|
282 |
+
|
283 |
+
|
284 |
+
class LoadStreams: # multiple IP or RTSP cameras
|
285 |
+
def __init__(self, sources='streams.txt', img_size=640):
|
286 |
+
self.mode = 'images'
|
287 |
+
self.img_size = img_size
|
288 |
+
|
289 |
+
if os.path.isfile(sources):
|
290 |
+
with open(sources, 'r') as f:
|
291 |
+
sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
|
292 |
+
else:
|
293 |
+
sources = [sources]
|
294 |
+
|
295 |
+
n = len(sources)
|
296 |
+
self.imgs = [None] * n
|
297 |
+
self.sources = sources
|
298 |
+
for i, s in enumerate(sources):
|
299 |
+
# Start the thread to read frames from the video stream
|
300 |
+
print('%g/%g: %s... ' % (i + 1, n, s), end='')
|
301 |
+
cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
|
302 |
+
assert cap.isOpened(), 'Failed to open %s' % s
|
303 |
+
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
304 |
+
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
305 |
+
fps = cap.get(cv2.CAP_PROP_FPS) % 100
|
306 |
+
_, self.imgs[i] = cap.read() # guarantee first frame
|
307 |
+
thread = Thread(target=self.update, args=([i, cap]), daemon=True)
|
308 |
+
print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
|
309 |
+
thread.start()
|
310 |
+
print('') # newline
|
311 |
+
|
312 |
+
# check for common shapes
|
313 |
+
s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
|
314 |
+
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
|
315 |
+
if not self.rect:
|
316 |
+
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
|
317 |
+
|
318 |
+
def update(self, index, cap):
|
319 |
+
# Read next stream frame in a daemon thread
|
320 |
+
n = 0
|
321 |
+
while cap.isOpened():
|
322 |
+
n += 1
|
323 |
+
# _, self.imgs[index] = cap.read()
|
324 |
+
cap.grab()
|
325 |
+
if n == 4: # read every 4th frame
|
326 |
+
_, self.imgs[index] = cap.retrieve()
|
327 |
+
n = 0
|
328 |
+
time.sleep(0.01) # wait time
|
329 |
+
|
330 |
+
def __iter__(self):
|
331 |
+
self.count = -1
|
332 |
+
return self
|
333 |
+
|
334 |
+
def __next__(self):
|
335 |
+
self.count += 1
|
336 |
+
img0 = self.imgs.copy()
|
337 |
+
if cv2.waitKey(1) == ord('q'): # q to quit
|
338 |
+
cv2.destroyAllWindows()
|
339 |
+
raise StopIteration
|
340 |
+
|
341 |
+
# Letterbox
|
342 |
+
img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
|
343 |
+
|
344 |
+
# Stack
|
345 |
+
img = np.stack(img, 0)
|
346 |
+
|
347 |
+
# Convert
|
348 |
+
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
|
349 |
+
img = np.ascontiguousarray(img)
|
350 |
+
|
351 |
+
return self.sources, img, img0, None
|
352 |
+
|
353 |
+
def __len__(self):
|
354 |
+
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
|
355 |
+
|
356 |
+
|
357 |
+
class LoadImagesAndLabels(Dataset): # for training/testing
|
358 |
+
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
|
359 |
+
cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1):
|
360 |
+
self.img_size = img_size
|
361 |
+
self.augment = augment
|
362 |
+
self.hyp = hyp
|
363 |
+
self.image_weights = image_weights
|
364 |
+
self.rect = False if image_weights else rect
|
365 |
+
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
|
366 |
+
self.mosaic_border = [-img_size // 2, -img_size // 2]
|
367 |
+
self.stride = stride
|
368 |
+
|
369 |
+
def img2label_paths(img_paths):
|
370 |
+
# Define label paths as a function of image paths
|
371 |
+
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
|
372 |
+
return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths]
|
373 |
+
|
374 |
+
try:
|
375 |
+
f = [] # image files
|
376 |
+
for p in path if isinstance(path, list) else [path]:
|
377 |
+
p = Path(p) # os-agnostic
|
378 |
+
if p.is_dir(): # dir
|
379 |
+
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
|
380 |
+
elif p.is_file(): # file
|
381 |
+
with open(p, 'r') as t:
|
382 |
+
t = t.read().splitlines()
|
383 |
+
parent = str(p.parent) + os.sep
|
384 |
+
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
|
385 |
+
else:
|
386 |
+
raise Exception('%s does not exist' % p)
|
387 |
+
self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
|
388 |
+
assert self.img_files, 'No images found'
|
389 |
+
except Exception as e:
|
390 |
+
raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
|
391 |
+
|
392 |
+
# Check cache
|
393 |
+
self.label_files = img2label_paths(self.img_files) # labels
|
394 |
+
cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels
|
395 |
+
if os.path.isfile(cache_path):
|
396 |
+
cache = torch.load(cache_path) # load
|
397 |
+
if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed
|
398 |
+
cache = self.cache_labels(cache_path) # re-cache
|
399 |
+
else:
|
400 |
+
cache = self.cache_labels(cache_path) # cache
|
401 |
+
|
402 |
+
# Read cache
|
403 |
+
cache.pop('hash') # remove hash
|
404 |
+
labels, shapes = zip(*cache.values())
|
405 |
+
self.labels = list(labels)
|
406 |
+
self.shapes = np.array(shapes, dtype=np.float64)
|
407 |
+
self.img_files = list(cache.keys()) # update
|
408 |
+
self.label_files = img2label_paths(cache.keys()) # update
|
409 |
+
|
410 |
+
n = len(shapes) # number of images
|
411 |
+
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
|
412 |
+
nb = bi[-1] + 1 # number of batches
|
413 |
+
self.batch = bi # batch index of image
|
414 |
+
self.n = n
|
415 |
+
|
416 |
+
# Rectangular Training
|
417 |
+
if self.rect:
|
418 |
+
# Sort by aspect ratio
|
419 |
+
s = self.shapes # wh
|
420 |
+
ar = s[:, 1] / s[:, 0] # aspect ratio
|
421 |
+
irect = ar.argsort()
|
422 |
+
self.img_files = [self.img_files[i] for i in irect]
|
423 |
+
self.label_files = [self.label_files[i] for i in irect]
|
424 |
+
self.labels = [self.labels[i] for i in irect]
|
425 |
+
self.shapes = s[irect] # wh
|
426 |
+
ar = ar[irect]
|
427 |
+
|
428 |
+
# Set training image shapes
|
429 |
+
shapes = [[1, 1]] * nb
|
430 |
+
for i in range(nb):
|
431 |
+
ari = ar[bi == i]
|
432 |
+
mini, maxi = ari.min(), ari.max()
|
433 |
+
if maxi < 1:
|
434 |
+
shapes[i] = [maxi, 1]
|
435 |
+
elif mini > 1:
|
436 |
+
shapes[i] = [1, 1 / mini]
|
437 |
+
|
438 |
+
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
|
439 |
+
|
440 |
+
# Check labels
|
441 |
+
create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
|
442 |
+
nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
|
443 |
+
pbar = enumerate(self.label_files)
|
444 |
+
if rank in [-1, 0]:
|
445 |
+
pbar = tqdm(pbar)
|
446 |
+
for i, file in pbar:
|
447 |
+
l = self.labels[i] # label
|
448 |
+
if l is not None and l.shape[0]:
|
449 |
+
assert l.shape[1] == 5, '> 5 label columns: %s' % file
|
450 |
+
assert (l >= 0).all(), 'negative labels: %s' % file
|
451 |
+
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
|
452 |
+
if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
|
453 |
+
nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
|
454 |
+
if single_cls:
|
455 |
+
l[:, 0] = 0 # force dataset into single-class mode
|
456 |
+
self.labels[i] = l
|
457 |
+
nf += 1 # file found
|
458 |
+
|
459 |
+
# Create subdataset (a smaller dataset)
|
460 |
+
if create_datasubset and ns < 1E4:
|
461 |
+
if ns == 0:
|
462 |
+
create_folder(path='./datasubset')
|
463 |
+
os.makedirs('./datasubset/images')
|
464 |
+
exclude_classes = 43
|
465 |
+
if exclude_classes not in l[:, 0]:
|
466 |
+
ns += 1
|
467 |
+
# shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
|
468 |
+
with open('./datasubset/images.txt', 'a') as f:
|
469 |
+
f.write(self.img_files[i] + '\n')
|
470 |
+
|
471 |
+
# Extract object detection boxes for a second stage classifier
|
472 |
+
if extract_bounding_boxes:
|
473 |
+
p = Path(self.img_files[i])
|
474 |
+
img = cv2.imread(str(p))
|
475 |
+
h, w = img.shape[:2]
|
476 |
+
for j, x in enumerate(l):
|
477 |
+
f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
|
478 |
+
if not os.path.exists(Path(f).parent):
|
479 |
+
os.makedirs(Path(f).parent) # make new output folder
|
480 |
+
|
481 |
+
b = x[1:] * [w, h, w, h] # box
|
482 |
+
b[2:] = b[2:].max() # rectangle to square
|
483 |
+
b[2:] = b[2:] * 1.3 + 30 # pad
|
484 |
+
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
|
485 |
+
|
486 |
+
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
|
487 |
+
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
|
488 |
+
assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
|
489 |
+
else:
|
490 |
+
ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
|
491 |
+
# os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
|
492 |
+
|
493 |
+
if rank in [-1, 0]:
|
494 |
+
pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
|
495 |
+
cache_path, nf, nm, ne, nd, n)
|
496 |
+
if nf == 0:
|
497 |
+
s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
|
498 |
+
print(s)
|
499 |
+
assert not augment, '%s. Can not train without labels.' % s
|
500 |
+
|
501 |
+
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
|
502 |
+
self.imgs = [None] * n
|
503 |
+
if cache_images:
|
504 |
+
gb = 0 # Gigabytes of cached images
|
505 |
+
self.img_hw0, self.img_hw = [None] * n, [None] * n
|
506 |
+
results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
|
507 |
+
pbar = tqdm(enumerate(results), total=n)
|
508 |
+
for i, x in pbar:
|
509 |
+
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
|
510 |
+
gb += self.imgs[i].nbytes
|
511 |
+
pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
|
512 |
+
|
513 |
+
def cache_labels(self, path='labels.cache3'):
|
514 |
+
# Cache dataset labels, check images and read shapes
|
515 |
+
x = {} # dict
|
516 |
+
pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
|
517 |
+
for (img, label) in pbar:
|
518 |
+
try:
|
519 |
+
l = []
|
520 |
+
im = Image.open(img)
|
521 |
+
im.verify() # PIL verify
|
522 |
+
shape = exif_size(im) # image size
|
523 |
+
assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
|
524 |
+
if os.path.isfile(label):
|
525 |
+
with open(label, 'r') as f:
|
526 |
+
l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels
|
527 |
+
if len(l) == 0:
|
528 |
+
l = np.zeros((0, 5), dtype=np.float32)
|
529 |
+
x[img] = [l, shape]
|
530 |
+
except Exception as e:
|
531 |
+
print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e))
|
532 |
+
|
533 |
+
x['hash'] = get_hash(self.label_files + self.img_files)
|
534 |
+
torch.save(x, path) # save for next time
|
535 |
+
return x
|
536 |
+
|
537 |
+
def __len__(self):
|
538 |
+
return len(self.img_files)
|
539 |
+
|
540 |
+
# def __iter__(self):
|
541 |
+
# self.count = -1
|
542 |
+
# print('ran dataset iter')
|
543 |
+
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
|
544 |
+
# return self
|
545 |
+
|
546 |
+
def __getitem__(self, index):
|
547 |
+
if self.image_weights:
|
548 |
+
index = self.indices[index]
|
549 |
+
|
550 |
+
hyp = self.hyp
|
551 |
+
mosaic = self.mosaic and random.random() < hyp['mosaic']
|
552 |
+
if mosaic:
|
553 |
+
# Load mosaic
|
554 |
+
img, labels = load_mosaic(self, index)
|
555 |
+
#img, labels = load_mosaic9(self, index)
|
556 |
+
shapes = None
|
557 |
+
|
558 |
+
# MixUp https://arxiv.org/pdf/1710.09412.pdf
|
559 |
+
if random.random() < hyp['mixup']:
|
560 |
+
img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
|
561 |
+
#img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1))
|
562 |
+
r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
|
563 |
+
img = (img * r + img2 * (1 - r)).astype(np.uint8)
|
564 |
+
labels = np.concatenate((labels, labels2), 0)
|
565 |
+
|
566 |
+
else:
|
567 |
+
# Load image
|
568 |
+
img, (h0, w0), (h, w) = load_image(self, index)
|
569 |
+
|
570 |
+
# Letterbox
|
571 |
+
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
|
572 |
+
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
|
573 |
+
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
|
574 |
+
|
575 |
+
# Load labels
|
576 |
+
labels = []
|
577 |
+
x = self.labels[index]
|
578 |
+
if x.size > 0:
|
579 |
+
# Normalized xywh to pixel xyxy format
|
580 |
+
labels = x.copy()
|
581 |
+
labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
|
582 |
+
labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
|
583 |
+
labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
|
584 |
+
labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
|
585 |
+
|
586 |
+
if self.augment:
|
587 |
+
# Augment imagespace
|
588 |
+
if not mosaic:
|
589 |
+
img, labels = random_perspective(img, labels,
|
590 |
+
degrees=hyp['degrees'],
|
591 |
+
translate=hyp['translate'],
|
592 |
+
scale=hyp['scale'],
|
593 |
+
shear=hyp['shear'],
|
594 |
+
perspective=hyp['perspective'])
|
595 |
+
|
596 |
+
# Augment colorspace
|
597 |
+
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
|
598 |
+
|
599 |
+
# Apply cutouts
|
600 |
+
# if random.random() < 0.9:
|
601 |
+
# labels = cutout(img, labels)
|
602 |
+
|
603 |
+
nL = len(labels) # number of labels
|
604 |
+
if nL:
|
605 |
+
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
|
606 |
+
labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
|
607 |
+
labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
|
608 |
+
|
609 |
+
if self.augment:
|
610 |
+
# flip up-down
|
611 |
+
if random.random() < hyp['flipud']:
|
612 |
+
img = np.flipud(img)
|
613 |
+
if nL:
|
614 |
+
labels[:, 2] = 1 - labels[:, 2]
|
615 |
+
|
616 |
+
# flip left-right
|
617 |
+
if random.random() < hyp['fliplr']:
|
618 |
+
img = np.fliplr(img)
|
619 |
+
if nL:
|
620 |
+
labels[:, 1] = 1 - labels[:, 1]
|
621 |
+
|
622 |
+
labels_out = torch.zeros((nL, 6))
|
623 |
+
if nL:
|
624 |
+
labels_out[:, 1:] = torch.from_numpy(labels)
|
625 |
+
|
626 |
+
# Convert
|
627 |
+
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
|
628 |
+
img = np.ascontiguousarray(img)
|
629 |
+
|
630 |
+
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
|
631 |
+
|
632 |
+
@staticmethod
|
633 |
+
def collate_fn(batch):
|
634 |
+
img, label, path, shapes = zip(*batch) # transposed
|
635 |
+
for i, l in enumerate(label):
|
636 |
+
l[:, 0] = i # add target image index for build_targets()
|
637 |
+
return torch.stack(img, 0), torch.cat(label, 0), path, shapes
|
638 |
+
|
639 |
+
|
640 |
+
class LoadImagesAndLabels9(Dataset): # for training/testing
|
641 |
+
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
|
642 |
+
cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1):
|
643 |
+
self.img_size = img_size
|
644 |
+
self.augment = augment
|
645 |
+
self.hyp = hyp
|
646 |
+
self.image_weights = image_weights
|
647 |
+
self.rect = False if image_weights else rect
|
648 |
+
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
|
649 |
+
self.mosaic_border = [-img_size // 2, -img_size // 2]
|
650 |
+
self.stride = stride
|
651 |
+
|
652 |
+
def img2label_paths(img_paths):
|
653 |
+
# Define label paths as a function of image paths
|
654 |
+
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
|
655 |
+
return [x.replace(sa, sb, 1).replace(x.split('.')[-1], 'txt') for x in img_paths]
|
656 |
+
|
657 |
+
try:
|
658 |
+
f = [] # image files
|
659 |
+
for p in path if isinstance(path, list) else [path]:
|
660 |
+
p = Path(p) # os-agnostic
|
661 |
+
if p.is_dir(): # dir
|
662 |
+
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
|
663 |
+
elif p.is_file(): # file
|
664 |
+
with open(p, 'r') as t:
|
665 |
+
t = t.read().splitlines()
|
666 |
+
parent = str(p.parent) + os.sep
|
667 |
+
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
|
668 |
+
else:
|
669 |
+
raise Exception('%s does not exist' % p)
|
670 |
+
self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
|
671 |
+
assert self.img_files, 'No images found'
|
672 |
+
except Exception as e:
|
673 |
+
raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
|
674 |
+
|
675 |
+
# Check cache
|
676 |
+
self.label_files = img2label_paths(self.img_files) # labels
|
677 |
+
cache_path = str(Path(self.label_files[0]).parent) + '.cache3' # cached labels
|
678 |
+
if os.path.isfile(cache_path):
|
679 |
+
cache = torch.load(cache_path) # load
|
680 |
+
if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed
|
681 |
+
cache = self.cache_labels(cache_path) # re-cache
|
682 |
+
else:
|
683 |
+
cache = self.cache_labels(cache_path) # cache
|
684 |
+
|
685 |
+
# Read cache
|
686 |
+
cache.pop('hash') # remove hash
|
687 |
+
labels, shapes = zip(*cache.values())
|
688 |
+
self.labels = list(labels)
|
689 |
+
self.shapes = np.array(shapes, dtype=np.float64)
|
690 |
+
self.img_files = list(cache.keys()) # update
|
691 |
+
self.label_files = img2label_paths(cache.keys()) # update
|
692 |
+
|
693 |
+
n = len(shapes) # number of images
|
694 |
+
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index
|
695 |
+
nb = bi[-1] + 1 # number of batches
|
696 |
+
self.batch = bi # batch index of image
|
697 |
+
self.n = n
|
698 |
+
|
699 |
+
# Rectangular Training
|
700 |
+
if self.rect:
|
701 |
+
# Sort by aspect ratio
|
702 |
+
s = self.shapes # wh
|
703 |
+
ar = s[:, 1] / s[:, 0] # aspect ratio
|
704 |
+
irect = ar.argsort()
|
705 |
+
self.img_files = [self.img_files[i] for i in irect]
|
706 |
+
self.label_files = [self.label_files[i] for i in irect]
|
707 |
+
self.labels = [self.labels[i] for i in irect]
|
708 |
+
self.shapes = s[irect] # wh
|
709 |
+
ar = ar[irect]
|
710 |
+
|
711 |
+
# Set training image shapes
|
712 |
+
shapes = [[1, 1]] * nb
|
713 |
+
for i in range(nb):
|
714 |
+
ari = ar[bi == i]
|
715 |
+
mini, maxi = ari.min(), ari.max()
|
716 |
+
if maxi < 1:
|
717 |
+
shapes[i] = [maxi, 1]
|
718 |
+
elif mini > 1:
|
719 |
+
shapes[i] = [1, 1 / mini]
|
720 |
+
|
721 |
+
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
|
722 |
+
|
723 |
+
# Check labels
|
724 |
+
create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False
|
725 |
+
nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
|
726 |
+
pbar = enumerate(self.label_files)
|
727 |
+
if rank in [-1, 0]:
|
728 |
+
pbar = tqdm(pbar)
|
729 |
+
for i, file in pbar:
|
730 |
+
l = self.labels[i] # label
|
731 |
+
if l is not None and l.shape[0]:
|
732 |
+
assert l.shape[1] == 5, '> 5 label columns: %s' % file
|
733 |
+
assert (l >= 0).all(), 'negative labels: %s' % file
|
734 |
+
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
|
735 |
+
if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows
|
736 |
+
nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
|
737 |
+
if single_cls:
|
738 |
+
l[:, 0] = 0 # force dataset into single-class mode
|
739 |
+
self.labels[i] = l
|
740 |
+
nf += 1 # file found
|
741 |
+
|
742 |
+
# Create subdataset (a smaller dataset)
|
743 |
+
if create_datasubset and ns < 1E4:
|
744 |
+
if ns == 0:
|
745 |
+
create_folder(path='./datasubset')
|
746 |
+
os.makedirs('./datasubset/images')
|
747 |
+
exclude_classes = 43
|
748 |
+
if exclude_classes not in l[:, 0]:
|
749 |
+
ns += 1
|
750 |
+
# shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
|
751 |
+
with open('./datasubset/images.txt', 'a') as f:
|
752 |
+
f.write(self.img_files[i] + '\n')
|
753 |
+
|
754 |
+
# Extract object detection boxes for a second stage classifier
|
755 |
+
if extract_bounding_boxes:
|
756 |
+
p = Path(self.img_files[i])
|
757 |
+
img = cv2.imread(str(p))
|
758 |
+
h, w = img.shape[:2]
|
759 |
+
for j, x in enumerate(l):
|
760 |
+
f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
|
761 |
+
if not os.path.exists(Path(f).parent):
|
762 |
+
os.makedirs(Path(f).parent) # make new output folder
|
763 |
+
|
764 |
+
b = x[1:] * [w, h, w, h] # box
|
765 |
+
b[2:] = b[2:].max() # rectangle to square
|
766 |
+
b[2:] = b[2:] * 1.3 + 30 # pad
|
767 |
+
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
|
768 |
+
|
769 |
+
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
|
770 |
+
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
|
771 |
+
assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
|
772 |
+
else:
|
773 |
+
ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
|
774 |
+
# os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
|
775 |
+
|
776 |
+
if rank in [-1, 0]:
|
777 |
+
pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
|
778 |
+
cache_path, nf, nm, ne, nd, n)
|
779 |
+
if nf == 0:
|
780 |
+
s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
|
781 |
+
print(s)
|
782 |
+
assert not augment, '%s. Can not train without labels.' % s
|
783 |
+
|
784 |
+
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
|
785 |
+
self.imgs = [None] * n
|
786 |
+
if cache_images:
|
787 |
+
gb = 0 # Gigabytes of cached images
|
788 |
+
self.img_hw0, self.img_hw = [None] * n, [None] * n
|
789 |
+
results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
|
790 |
+
pbar = tqdm(enumerate(results), total=n)
|
791 |
+
for i, x in pbar:
|
792 |
+
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
|
793 |
+
gb += self.imgs[i].nbytes
|
794 |
+
pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
|
795 |
+
|
796 |
+
def cache_labels(self, path='labels.cache3'):
|
797 |
+
# Cache dataset labels, check images and read shapes
|
798 |
+
x = {} # dict
|
799 |
+
pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
|
800 |
+
for (img, label) in pbar:
|
801 |
+
try:
|
802 |
+
l = []
|
803 |
+
im = Image.open(img)
|
804 |
+
im.verify() # PIL verify
|
805 |
+
shape = exif_size(im) # image size
|
806 |
+
assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
|
807 |
+
if os.path.isfile(label):
|
808 |
+
with open(label, 'r') as f:
|
809 |
+
l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels
|
810 |
+
if len(l) == 0:
|
811 |
+
l = np.zeros((0, 5), dtype=np.float32)
|
812 |
+
x[img] = [l, shape]
|
813 |
+
except Exception as e:
|
814 |
+
print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e))
|
815 |
+
|
816 |
+
x['hash'] = get_hash(self.label_files + self.img_files)
|
817 |
+
torch.save(x, path) # save for next time
|
818 |
+
return x
|
819 |
+
|
820 |
+
def __len__(self):
|
821 |
+
return len(self.img_files)
|
822 |
+
|
823 |
+
# def __iter__(self):
|
824 |
+
# self.count = -1
|
825 |
+
# print('ran dataset iter')
|
826 |
+
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
|
827 |
+
# return self
|
828 |
+
|
829 |
+
def __getitem__(self, index):
|
830 |
+
if self.image_weights:
|
831 |
+
index = self.indices[index]
|
832 |
+
|
833 |
+
hyp = self.hyp
|
834 |
+
mosaic = self.mosaic and random.random() < hyp['mosaic']
|
835 |
+
if mosaic:
|
836 |
+
# Load mosaic
|
837 |
+
#img, labels = load_mosaic(self, index)
|
838 |
+
img, labels = load_mosaic9(self, index)
|
839 |
+
shapes = None
|
840 |
+
|
841 |
+
# MixUp https://arxiv.org/pdf/1710.09412.pdf
|
842 |
+
if random.random() < hyp['mixup']:
|
843 |
+
#img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
|
844 |
+
img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1))
|
845 |
+
r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
|
846 |
+
img = (img * r + img2 * (1 - r)).astype(np.uint8)
|
847 |
+
labels = np.concatenate((labels, labels2), 0)
|
848 |
+
|
849 |
+
else:
|
850 |
+
# Load image
|
851 |
+
img, (h0, w0), (h, w) = load_image(self, index)
|
852 |
+
|
853 |
+
# Letterbox
|
854 |
+
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
|
855 |
+
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
|
856 |
+
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
|
857 |
+
|
858 |
+
# Load labels
|
859 |
+
labels = []
|
860 |
+
x = self.labels[index]
|
861 |
+
if x.size > 0:
|
862 |
+
# Normalized xywh to pixel xyxy format
|
863 |
+
labels = x.copy()
|
864 |
+
labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
|
865 |
+
labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
|
866 |
+
labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
|
867 |
+
labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
|
868 |
+
|
869 |
+
if self.augment:
|
870 |
+
# Augment imagespace
|
871 |
+
if not mosaic:
|
872 |
+
img, labels = random_perspective(img, labels,
|
873 |
+
degrees=hyp['degrees'],
|
874 |
+
translate=hyp['translate'],
|
875 |
+
scale=hyp['scale'],
|
876 |
+
shear=hyp['shear'],
|
877 |
+
perspective=hyp['perspective'])
|
878 |
+
|
879 |
+
# Augment colorspace
|
880 |
+
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
|
881 |
+
|
882 |
+
# Apply cutouts
|
883 |
+
# if random.random() < 0.9:
|
884 |
+
# labels = cutout(img, labels)
|
885 |
+
|
886 |
+
nL = len(labels) # number of labels
|
887 |
+
if nL:
|
888 |
+
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
|
889 |
+
labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
|
890 |
+
labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
|
891 |
+
|
892 |
+
if self.augment:
|
893 |
+
# flip up-down
|
894 |
+
if random.random() < hyp['flipud']:
|
895 |
+
img = np.flipud(img)
|
896 |
+
if nL:
|
897 |
+
labels[:, 2] = 1 - labels[:, 2]
|
898 |
+
|
899 |
+
# flip left-right
|
900 |
+
if random.random() < hyp['fliplr']:
|
901 |
+
img = np.fliplr(img)
|
902 |
+
if nL:
|
903 |
+
labels[:, 1] = 1 - labels[:, 1]
|
904 |
+
|
905 |
+
labels_out = torch.zeros((nL, 6))
|
906 |
+
if nL:
|
907 |
+
labels_out[:, 1:] = torch.from_numpy(labels)
|
908 |
+
|
909 |
+
# Convert
|
910 |
+
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
|
911 |
+
img = np.ascontiguousarray(img)
|
912 |
+
|
913 |
+
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
|
914 |
+
|
915 |
+
@staticmethod
|
916 |
+
def collate_fn(batch):
|
917 |
+
img, label, path, shapes = zip(*batch) # transposed
|
918 |
+
for i, l in enumerate(label):
|
919 |
+
l[:, 0] = i # add target image index for build_targets()
|
920 |
+
return torch.stack(img, 0), torch.cat(label, 0), path, shapes
|
921 |
+
|
922 |
+
|
923 |
+
# Ancillary functions --------------------------------------------------------------------------------------------------
|
924 |
+
def load_image(self, index):
|
925 |
+
# loads 1 image from dataset, returns img, original hw, resized hw
|
926 |
+
img = self.imgs[index]
|
927 |
+
if img is None: # not cached
|
928 |
+
path = self.img_files[index]
|
929 |
+
img = cv2.imread(path) # BGR
|
930 |
+
assert img is not None, 'Image Not Found ' + path
|
931 |
+
h0, w0 = img.shape[:2] # orig hw
|
932 |
+
r = self.img_size / max(h0, w0) # resize image to img_size
|
933 |
+
if r != 1: # always resize down, only resize up if training with augmentation
|
934 |
+
interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
|
935 |
+
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
|
936 |
+
return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
|
937 |
+
else:
|
938 |
+
return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
|
939 |
+
|
940 |
+
|
941 |
+
def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
|
942 |
+
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
|
943 |
+
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
|
944 |
+
dtype = img.dtype # uint8
|
945 |
+
|
946 |
+
x = np.arange(0, 256, dtype=np.int16)
|
947 |
+
lut_hue = ((x * r[0]) % 180).astype(dtype)
|
948 |
+
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
|
949 |
+
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
|
950 |
+
|
951 |
+
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
|
952 |
+
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
|
953 |
+
|
954 |
+
# Histogram equalization
|
955 |
+
# if random.random() < 0.2:
|
956 |
+
# for i in range(3):
|
957 |
+
# img[:, :, i] = cv2.equalizeHist(img[:, :, i])
|
958 |
+
|
959 |
+
|
960 |
+
def load_mosaic(self, index):
|
961 |
+
# loads images in a mosaic
|
962 |
+
|
963 |
+
labels4 = []
|
964 |
+
s = self.img_size
|
965 |
+
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
|
966 |
+
indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
|
967 |
+
for i, index in enumerate(indices):
|
968 |
+
# Load image
|
969 |
+
img, _, (h, w) = load_image(self, index)
|
970 |
+
|
971 |
+
# place img in img4
|
972 |
+
if i == 0: # top left
|
973 |
+
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
|
974 |
+
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
|
975 |
+
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
|
976 |
+
elif i == 1: # top right
|
977 |
+
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
|
978 |
+
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
|
979 |
+
elif i == 2: # bottom left
|
980 |
+
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
|
981 |
+
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
|
982 |
+
elif i == 3: # bottom right
|
983 |
+
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
|
984 |
+
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
|
985 |
+
|
986 |
+
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
987 |
+
padw = x1a - x1b
|
988 |
+
padh = y1a - y1b
|
989 |
+
|
990 |
+
# Labels
|
991 |
+
x = self.labels[index]
|
992 |
+
labels = x.copy()
|
993 |
+
if x.size > 0: # Normalized xywh to pixel xyxy format
|
994 |
+
labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
|
995 |
+
labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
|
996 |
+
labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
|
997 |
+
labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
|
998 |
+
labels4.append(labels)
|
999 |
+
|
1000 |
+
# Concat/clip labels
|
1001 |
+
if len(labels4):
|
1002 |
+
labels4 = np.concatenate(labels4, 0)
|
1003 |
+
np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective
|
1004 |
+
# img4, labels4 = replicate(img4, labels4) # replicate
|
1005 |
+
|
1006 |
+
# Augment
|
1007 |
+
img4, labels4 = random_perspective(img4, labels4,
|
1008 |
+
degrees=self.hyp['degrees'],
|
1009 |
+
translate=self.hyp['translate'],
|
1010 |
+
scale=self.hyp['scale'],
|
1011 |
+
shear=self.hyp['shear'],
|
1012 |
+
perspective=self.hyp['perspective'],
|
1013 |
+
border=self.mosaic_border) # border to remove
|
1014 |
+
|
1015 |
+
return img4, labels4
|
1016 |
+
|
1017 |
+
|
1018 |
+
def load_mosaic9(self, index):
|
1019 |
+
# loads images in a 9-mosaic
|
1020 |
+
|
1021 |
+
labels9 = []
|
1022 |
+
s = self.img_size
|
1023 |
+
indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(8)] # 8 additional image indices
|
1024 |
+
for i, index in enumerate(indices):
|
1025 |
+
# Load image
|
1026 |
+
img, _, (h, w) = load_image(self, index)
|
1027 |
+
|
1028 |
+
# place img in img9
|
1029 |
+
if i == 0: # center
|
1030 |
+
img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
|
1031 |
+
h0, w0 = h, w
|
1032 |
+
c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
|
1033 |
+
elif i == 1: # top
|
1034 |
+
c = s, s - h, s + w, s
|
1035 |
+
elif i == 2: # top right
|
1036 |
+
c = s + wp, s - h, s + wp + w, s
|
1037 |
+
elif i == 3: # right
|
1038 |
+
c = s + w0, s, s + w0 + w, s + h
|
1039 |
+
elif i == 4: # bottom right
|
1040 |
+
c = s + w0, s + hp, s + w0 + w, s + hp + h
|
1041 |
+
elif i == 5: # bottom
|
1042 |
+
c = s + w0 - w, s + h0, s + w0, s + h0 + h
|
1043 |
+
elif i == 6: # bottom left
|
1044 |
+
c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
|
1045 |
+
elif i == 7: # left
|
1046 |
+
c = s - w, s + h0 - h, s, s + h0
|
1047 |
+
elif i == 8: # top left
|
1048 |
+
c = s - w, s + h0 - hp - h, s, s + h0 - hp
|
1049 |
+
|
1050 |
+
padx, pady = c[:2]
|
1051 |
+
x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords
|
1052 |
+
|
1053 |
+
# Labels
|
1054 |
+
x = self.labels[index]
|
1055 |
+
labels = x.copy()
|
1056 |
+
if x.size > 0: # Normalized xywh to pixel xyxy format
|
1057 |
+
labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padx
|
1058 |
+
labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + pady
|
1059 |
+
labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padx
|
1060 |
+
labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + pady
|
1061 |
+
labels9.append(labels)
|
1062 |
+
|
1063 |
+
# Image
|
1064 |
+
img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
|
1065 |
+
hp, wp = h, w # height, width previous
|
1066 |
+
|
1067 |
+
# Offset
|
1068 |
+
yc, xc = [int(random.uniform(0, s)) for x in self.mosaic_border] # mosaic center x, y
|
1069 |
+
img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
|
1070 |
+
|
1071 |
+
# Concat/clip labels
|
1072 |
+
if len(labels9):
|
1073 |
+
labels9 = np.concatenate(labels9, 0)
|
1074 |
+
labels9[:, [1, 3]] -= xc
|
1075 |
+
labels9[:, [2, 4]] -= yc
|
1076 |
+
|
1077 |
+
np.clip(labels9[:, 1:], 0, 2 * s, out=labels9[:, 1:]) # use with random_perspective
|
1078 |
+
# img9, labels9 = replicate(img9, labels9) # replicate
|
1079 |
+
|
1080 |
+
# Augment
|
1081 |
+
img9, labels9 = random_perspective(img9, labels9,
|
1082 |
+
degrees=self.hyp['degrees'],
|
1083 |
+
translate=self.hyp['translate'],
|
1084 |
+
scale=self.hyp['scale'],
|
1085 |
+
shear=self.hyp['shear'],
|
1086 |
+
perspective=self.hyp['perspective'],
|
1087 |
+
border=self.mosaic_border) # border to remove
|
1088 |
+
|
1089 |
+
return img9, labels9
|
1090 |
+
|
1091 |
+
|
1092 |
+
def replicate(img, labels):
|
1093 |
+
# Replicate labels
|
1094 |
+
h, w = img.shape[:2]
|
1095 |
+
boxes = labels[:, 1:].astype(int)
|
1096 |
+
x1, y1, x2, y2 = boxes.T
|
1097 |
+
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
|
1098 |
+
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
|
1099 |
+
x1b, y1b, x2b, y2b = boxes[i]
|
1100 |
+
bh, bw = y2b - y1b, x2b - x1b
|
1101 |
+
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
|
1102 |
+
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
|
1103 |
+
img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
1104 |
+
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
|
1105 |
+
|
1106 |
+
return img, labels
|
1107 |
+
|
1108 |
+
|
1109 |
+
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, auto_size=32):
|
1110 |
+
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
|
1111 |
+
shape = img.shape[:2] # current shape [height, width]
|
1112 |
+
if isinstance(new_shape, int):
|
1113 |
+
new_shape = (new_shape, new_shape)
|
1114 |
+
|
1115 |
+
# Scale ratio (new / old)
|
1116 |
+
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
1117 |
+
if not scaleup: # only scale down, do not scale up (for better test mAP)
|
1118 |
+
r = min(r, 1.0)
|
1119 |
+
|
1120 |
+
# Compute padding
|
1121 |
+
ratio = r, r # width, height ratios
|
1122 |
+
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
1123 |
+
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
1124 |
+
if auto: # minimum rectangle
|
1125 |
+
dw, dh = np.mod(dw, auto_size), np.mod(dh, auto_size) # wh padding
|
1126 |
+
elif scaleFill: # stretch
|
1127 |
+
dw, dh = 0.0, 0.0
|
1128 |
+
new_unpad = (new_shape[1], new_shape[0])
|
1129 |
+
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
|
1130 |
+
|
1131 |
+
dw /= 2 # divide padding into 2 sides
|
1132 |
+
dh /= 2
|
1133 |
+
|
1134 |
+
if shape[::-1] != new_unpad: # resize
|
1135 |
+
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
1136 |
+
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
1137 |
+
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
1138 |
+
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
1139 |
+
return img, ratio, (dw, dh)
|
1140 |
+
|
1141 |
+
|
1142 |
+
def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
|
1143 |
+
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
|
1144 |
+
# targets = [cls, xyxy]
|
1145 |
+
|
1146 |
+
height = img.shape[0] + border[0] * 2 # shape(h,w,c)
|
1147 |
+
width = img.shape[1] + border[1] * 2
|
1148 |
+
|
1149 |
+
# Center
|
1150 |
+
C = np.eye(3)
|
1151 |
+
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
|
1152 |
+
C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
|
1153 |
+
|
1154 |
+
# Perspective
|
1155 |
+
P = np.eye(3)
|
1156 |
+
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
|
1157 |
+
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
|
1158 |
+
|
1159 |
+
# Rotation and Scale
|
1160 |
+
R = np.eye(3)
|
1161 |
+
a = random.uniform(-degrees, degrees)
|
1162 |
+
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
|
1163 |
+
s = random.uniform(1 - scale, 1 + scale)
|
1164 |
+
# s = 2 ** random.uniform(-scale, scale)
|
1165 |
+
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
|
1166 |
+
|
1167 |
+
# Shear
|
1168 |
+
S = np.eye(3)
|
1169 |
+
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
|
1170 |
+
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
|
1171 |
+
|
1172 |
+
# Translation
|
1173 |
+
T = np.eye(3)
|
1174 |
+
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
|
1175 |
+
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
|
1176 |
+
|
1177 |
+
# Combined rotation matrix
|
1178 |
+
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
|
1179 |
+
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
|
1180 |
+
if perspective:
|
1181 |
+
img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
|
1182 |
+
else: # affine
|
1183 |
+
img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
|
1184 |
+
|
1185 |
+
# Visualize
|
1186 |
+
# import matplotlib.pyplot as plt
|
1187 |
+
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
|
1188 |
+
# ax[0].imshow(img[:, :, ::-1]) # base
|
1189 |
+
# ax[1].imshow(img2[:, :, ::-1]) # warped
|
1190 |
+
|
1191 |
+
# Transform label coordinates
|
1192 |
+
n = len(targets)
|
1193 |
+
if n:
|
1194 |
+
# warp points
|
1195 |
+
xy = np.ones((n * 4, 3))
|
1196 |
+
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
|
1197 |
+
xy = xy @ M.T # transform
|
1198 |
+
if perspective:
|
1199 |
+
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
|
1200 |
+
else: # affine
|
1201 |
+
xy = xy[:, :2].reshape(n, 8)
|
1202 |
+
|
1203 |
+
# create new boxes
|
1204 |
+
x = xy[:, [0, 2, 4, 6]]
|
1205 |
+
y = xy[:, [1, 3, 5, 7]]
|
1206 |
+
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
|
1207 |
+
|
1208 |
+
# # apply angle-based reduction of bounding boxes
|
1209 |
+
# radians = a * math.pi / 180
|
1210 |
+
# reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
|
1211 |
+
# x = (xy[:, 2] + xy[:, 0]) / 2
|
1212 |
+
# y = (xy[:, 3] + xy[:, 1]) / 2
|
1213 |
+
# w = (xy[:, 2] - xy[:, 0]) * reduction
|
1214 |
+
# h = (xy[:, 3] - xy[:, 1]) * reduction
|
1215 |
+
# xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
|
1216 |
+
|
1217 |
+
# clip boxes
|
1218 |
+
xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
|
1219 |
+
xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
|
1220 |
+
|
1221 |
+
# filter candidates
|
1222 |
+
i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
|
1223 |
+
targets = targets[i]
|
1224 |
+
targets[:, 1:5] = xy[i]
|
1225 |
+
|
1226 |
+
return img, targets
|
1227 |
+
|
1228 |
+
|
1229 |
+
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n)
|
1230 |
+
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
|
1231 |
+
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
|
1232 |
+
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
|
1233 |
+
ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
|
1234 |
+
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates
|
1235 |
+
|
1236 |
+
|
1237 |
+
def cutout(image, labels):
|
1238 |
+
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
|
1239 |
+
h, w = image.shape[:2]
|
1240 |
+
|
1241 |
+
def bbox_ioa(box1, box2):
|
1242 |
+
# Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
|
1243 |
+
box2 = box2.transpose()
|
1244 |
+
|
1245 |
+
# Get the coordinates of bounding boxes
|
1246 |
+
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
|
1247 |
+
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
|
1248 |
+
|
1249 |
+
# Intersection area
|
1250 |
+
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
|
1251 |
+
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
|
1252 |
+
|
1253 |
+
# box2 area
|
1254 |
+
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
|
1255 |
+
|
1256 |
+
# Intersection over box2 area
|
1257 |
+
return inter_area / box2_area
|
1258 |
+
|
1259 |
+
# create random masks
|
1260 |
+
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
|
1261 |
+
for s in scales:
|
1262 |
+
mask_h = random.randint(1, int(h * s))
|
1263 |
+
mask_w = random.randint(1, int(w * s))
|
1264 |
+
|
1265 |
+
# box
|
1266 |
+
xmin = max(0, random.randint(0, w) - mask_w // 2)
|
1267 |
+
ymin = max(0, random.randint(0, h) - mask_h // 2)
|
1268 |
+
xmax = min(w, xmin + mask_w)
|
1269 |
+
ymax = min(h, ymin + mask_h)
|
1270 |
+
|
1271 |
+
# apply random color mask
|
1272 |
+
image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
|
1273 |
+
|
1274 |
+
# return unobscured labels
|
1275 |
+
if len(labels) and s > 0.03:
|
1276 |
+
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
|
1277 |
+
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
|
1278 |
+
labels = labels[ioa < 0.60] # remove >60% obscured labels
|
1279 |
+
|
1280 |
+
return labels
|
1281 |
+
|
1282 |
+
|
1283 |
+
def create_folder(path='./new'):
|
1284 |
+
# Create folder
|
1285 |
+
if os.path.exists(path):
|
1286 |
+
shutil.rmtree(path) # delete output folder
|
1287 |
+
os.makedirs(path) # make new output folder
|
1288 |
+
|
1289 |
+
|
1290 |
+
def flatten_recursive(path='../coco128'):
|
1291 |
+
# Flatten a recursive directory by bringing all files to top level
|
1292 |
+
new_path = Path(path + '_flat')
|
1293 |
+
create_folder(new_path)
|
1294 |
+
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
|
1295 |
+
shutil.copyfile(file, new_path / Path(file).name)
|
1296 |
+
|
1297 |
+
|
asone/detectors/yolor/utils/export.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from asone.detectors.yolor.models.models import *
|
5 |
+
from asone.detectors.yolor.utils.google_utils import attempt_download
|
6 |
+
|
7 |
+
if __name__ == '__main__':
|
8 |
+
parser = argparse.ArgumentParser()
|
9 |
+
parser.add_argument('--weights', type=str, default='./yolov4.pt', help='weights path')
|
10 |
+
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
|
11 |
+
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
|
12 |
+
parser.add_argument('--cfg', type=str, default='cfg/yolor_p6.cfg', help='*.cfg path')
|
13 |
+
opt = parser.parse_args()
|
14 |
+
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
|
15 |
+
# print(opt)
|
16 |
+
|
17 |
+
# Input
|
18 |
+
img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
|
19 |
+
|
20 |
+
# Load PyTorch model
|
21 |
+
attempt_download(opt.weights)
|
22 |
+
# print(ad)
|
23 |
+
# model = Darknet(cfg, ).cuda()
|
24 |
+
model.load_state_dict(torch.load(opt.weights, map_location=device)['model'])
|
25 |
+
print(type(model))
|
26 |
+
print("*"*50)
|
27 |
+
exit()
|
28 |
+
model.eval()
|
29 |
+
model.model[-1].export = True # set Detect() layer export=True
|
30 |
+
y = model(img) # dry run
|
31 |
+
|
32 |
+
# print("-------------------")
|
33 |
+
# model = Darknet(cfg, imgsz).cuda()
|
34 |
+
# model.load_state_dict(torch.load(weights[0], map_location=device)['model'])
|
35 |
+
#model = attempt_load(weights, map_location=device) # load FP32 model
|
36 |
+
#imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
|
37 |
+
# model.to(device).eval()
|
38 |
+
# TorchScript export
|
39 |
+
try:
|
40 |
+
print('\nStarting TorchScript export with torch %s...' % torch.__version__)
|
41 |
+
f = opt.weights.replace('.pt', '.torchscript.pt') # filename
|
42 |
+
ts = torch.jit.trace(model, img)
|
43 |
+
ts.save(f)
|
44 |
+
print('TorchScript export success, saved as %s' % f)
|
45 |
+
except Exception as e:
|
46 |
+
print('TorchScript export failure: %s' % e)
|
47 |
+
|
48 |
+
# ONNX export
|
49 |
+
try:
|
50 |
+
import onnx
|
51 |
+
|
52 |
+
print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
|
53 |
+
f = opt.weights.replace('.pt', '.onnx') # filename
|
54 |
+
model.fuse() # only for ONNX
|
55 |
+
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
|
56 |
+
output_names=['classes', 'boxes'] if y is None else ['output'])
|
57 |
+
|
58 |
+
# Checks
|
59 |
+
onnx_model = onnx.load(f) # load onnx model
|
60 |
+
onnx.checker.check_model(onnx_model) # check onnx model
|
61 |
+
print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
|
62 |
+
print('ONNX export success, saved as %s' % f)
|
63 |
+
except Exception as e:
|
64 |
+
print('ONNX export failure: %s' % e)
|
65 |
+
|
66 |
+
# CoreML export
|
67 |
+
try:
|
68 |
+
import coremltools as ct
|
69 |
+
|
70 |
+
print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
|
71 |
+
# convert model from torchscript and apply pixel scaling as per detect.py
|
72 |
+
model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
|
73 |
+
f = opt.weights.replace('.pt', '.mlmodel') # filename
|
74 |
+
model.save(f)
|
75 |
+
print('CoreML export success, saved as %s' % f)
|
76 |
+
except Exception as e:
|
77 |
+
print('CoreML export failure: %s' % e)
|
78 |
+
|
79 |
+
# Finish
|
80 |
+
print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
|
asone/detectors/yolor/utils/general.py
ADDED
@@ -0,0 +1,449 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# General utils
|
2 |
+
|
3 |
+
import glob
|
4 |
+
import logging
|
5 |
+
import math
|
6 |
+
import os
|
7 |
+
import platform
|
8 |
+
import random
|
9 |
+
import re
|
10 |
+
import subprocess
|
11 |
+
import time
|
12 |
+
from pathlib import Path
|
13 |
+
|
14 |
+
import cv2
|
15 |
+
import matplotlib
|
16 |
+
import numpy as np
|
17 |
+
import torch
|
18 |
+
import yaml
|
19 |
+
|
20 |
+
from asone.detectors.yolor.utils.google_utils import gsutil_getsize
|
21 |
+
from asone.detectors.yolor.utils.metrics import fitness
|
22 |
+
from asone.detectors.yolor.utils.torch_utils import init_torch_seeds
|
23 |
+
|
24 |
+
# Set printoptions
|
25 |
+
torch.set_printoptions(linewidth=320, precision=5, profile='long')
|
26 |
+
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
|
27 |
+
matplotlib.rc('font', **{'size': 11})
|
28 |
+
|
29 |
+
# Prevent OpenCV from multithreading (to use PyTorch DataLoader)
|
30 |
+
cv2.setNumThreads(0)
|
31 |
+
|
32 |
+
|
33 |
+
def set_logging(rank=-1):
|
34 |
+
logging.basicConfig(
|
35 |
+
format="%(message)s",
|
36 |
+
level=logging.INFO if rank in [-1, 0] else logging.WARN)
|
37 |
+
|
38 |
+
|
39 |
+
def init_seeds(seed=0):
|
40 |
+
random.seed(seed)
|
41 |
+
np.random.seed(seed)
|
42 |
+
init_torch_seeds(seed)
|
43 |
+
|
44 |
+
|
45 |
+
def get_latest_run(search_dir='.'):
|
46 |
+
# Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
|
47 |
+
last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
|
48 |
+
return max(last_list, key=os.path.getctime) if last_list else ''
|
49 |
+
|
50 |
+
|
51 |
+
def check_git_status():
|
52 |
+
# Suggest 'git pull' if repo is out of date
|
53 |
+
if platform.system() in ['Linux', 'Darwin'] and not os.path.isfile('/.dockerenv'):
|
54 |
+
s = subprocess.check_output('if [ -d .git ]; then git fetch && git status -uno; fi', shell=True).decode('utf-8')
|
55 |
+
if 'Your branch is behind' in s:
|
56 |
+
print(s[s.find('Your branch is behind'):s.find('\n\n')] + '\n')
|
57 |
+
|
58 |
+
|
59 |
+
def check_img_size(img_size, s=32):
|
60 |
+
# Verify img_size is a multiple of stride s
|
61 |
+
new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
|
62 |
+
if new_size != img_size:
|
63 |
+
print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
|
64 |
+
return new_size
|
65 |
+
|
66 |
+
|
67 |
+
def check_file(file):
|
68 |
+
# Search for file if not found
|
69 |
+
if os.path.isfile(file) or file == '':
|
70 |
+
return file
|
71 |
+
else:
|
72 |
+
files = glob.glob('./**/' + file, recursive=True) # find file
|
73 |
+
assert len(files), 'File Not Found: %s' % file # assert file was found
|
74 |
+
assert len(files) == 1, "Multiple files match '%s', specify exact path: %s" % (file, files) # assert unique
|
75 |
+
return files[0] # return file
|
76 |
+
|
77 |
+
|
78 |
+
def check_dataset(dict):
|
79 |
+
# Download dataset if not found locally
|
80 |
+
val, s = dict.get('val'), dict.get('download')
|
81 |
+
if val and len(val):
|
82 |
+
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
83 |
+
if not all(x.exists() for x in val):
|
84 |
+
print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
|
85 |
+
if s and len(s): # download script
|
86 |
+
print('Downloading %s ...' % s)
|
87 |
+
if s.startswith('http') and s.endswith('.zip'): # URL
|
88 |
+
f = Path(s).name # filename
|
89 |
+
torch.hub.download_url_to_file(s, f)
|
90 |
+
r = os.system('unzip -q %s -d ../ && rm %s' % (f, f)) # unzip
|
91 |
+
else: # bash script
|
92 |
+
r = os.system(s)
|
93 |
+
print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value
|
94 |
+
else:
|
95 |
+
raise Exception('Dataset not found.')
|
96 |
+
|
97 |
+
|
98 |
+
def make_divisible(x, divisor):
|
99 |
+
# Returns x evenly divisible by divisor
|
100 |
+
return math.ceil(x / divisor) * divisor
|
101 |
+
|
102 |
+
|
103 |
+
def labels_to_class_weights(labels, nc=80):
|
104 |
+
# Get class weights (inverse frequency) from training labels
|
105 |
+
if labels[0] is None: # no labels loaded
|
106 |
+
return torch.Tensor()
|
107 |
+
|
108 |
+
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
|
109 |
+
classes = labels[:, 0].astype(np.int) # labels = [class xywh]
|
110 |
+
weights = np.bincount(classes, minlength=nc) # occurrences per class
|
111 |
+
|
112 |
+
# Prepend gridpoint count (for uCE training)
|
113 |
+
# gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
|
114 |
+
# weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
|
115 |
+
|
116 |
+
weights[weights == 0] = 1 # replace empty bins with 1
|
117 |
+
weights = 1 / weights # number of targets per class
|
118 |
+
weights /= weights.sum() # normalize
|
119 |
+
return torch.from_numpy(weights)
|
120 |
+
|
121 |
+
|
122 |
+
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
|
123 |
+
# Produces image weights based on class mAPs
|
124 |
+
n = len(labels)
|
125 |
+
class_counts = np.array([np.bincount(labels[i][:, 0].astype(np.int), minlength=nc) for i in range(n)])
|
126 |
+
image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
|
127 |
+
# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
|
128 |
+
return image_weights
|
129 |
+
|
130 |
+
|
131 |
+
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
|
132 |
+
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
133 |
+
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
|
134 |
+
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
|
135 |
+
# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
|
136 |
+
# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
|
137 |
+
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
|
138 |
+
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
139 |
+
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
|
140 |
+
return x
|
141 |
+
|
142 |
+
|
143 |
+
def xyxy2xywh(x):
|
144 |
+
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
|
145 |
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
146 |
+
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
|
147 |
+
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
|
148 |
+
y[:, 2] = x[:, 2] - x[:, 0] # width
|
149 |
+
y[:, 3] = x[:, 3] - x[:, 1] # height
|
150 |
+
return y
|
151 |
+
|
152 |
+
|
153 |
+
def xywh2xyxy(x):
|
154 |
+
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
155 |
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
156 |
+
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
|
157 |
+
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
|
158 |
+
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
|
159 |
+
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
|
160 |
+
return y
|
161 |
+
|
162 |
+
|
163 |
+
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
|
164 |
+
# Rescale coords (xyxy) from img1_shape to img0_shape
|
165 |
+
if ratio_pad is None: # calculate from img0_shape
|
166 |
+
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
167 |
+
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
168 |
+
else:
|
169 |
+
gain = ratio_pad[0][0]
|
170 |
+
pad = ratio_pad[1]
|
171 |
+
|
172 |
+
coords[:, [0, 2]] -= pad[0] # x padding
|
173 |
+
coords[:, [1, 3]] -= pad[1] # y padding
|
174 |
+
coords[:, :4] /= gain
|
175 |
+
clip_coords(coords, img0_shape)
|
176 |
+
return coords
|
177 |
+
|
178 |
+
|
179 |
+
def clip_coords(boxes, img_shape):
|
180 |
+
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
181 |
+
boxes[:, 0].clamp_(0, img_shape[1]) # x1
|
182 |
+
boxes[:, 1].clamp_(0, img_shape[0]) # y1
|
183 |
+
boxes[:, 2].clamp_(0, img_shape[1]) # x2
|
184 |
+
boxes[:, 3].clamp_(0, img_shape[0]) # y2
|
185 |
+
|
186 |
+
|
187 |
+
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, EIoU=False, ECIoU=False, eps=1e-9):
|
188 |
+
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
|
189 |
+
box2 = box2.T
|
190 |
+
|
191 |
+
# Get the coordinates of bounding boxes
|
192 |
+
if x1y1x2y2: # x1, y1, x2, y2 = box1
|
193 |
+
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
|
194 |
+
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
|
195 |
+
else: # transform from xywh to xyxy
|
196 |
+
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
|
197 |
+
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
|
198 |
+
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
|
199 |
+
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
|
200 |
+
|
201 |
+
# Intersection area
|
202 |
+
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
|
203 |
+
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
|
204 |
+
|
205 |
+
# Union Area
|
206 |
+
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
|
207 |
+
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
|
208 |
+
union = w1 * h1 + w2 * h2 - inter + eps
|
209 |
+
|
210 |
+
iou = inter / union
|
211 |
+
if GIoU or DIoU or CIoU or EIoU or ECIoU:
|
212 |
+
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width
|
213 |
+
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
|
214 |
+
if CIoU or DIoU or EIoU or ECIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
|
215 |
+
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
|
216 |
+
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
|
217 |
+
(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
|
218 |
+
if DIoU:
|
219 |
+
return iou - rho2 / c2 # DIoU
|
220 |
+
elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
|
221 |
+
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
|
222 |
+
with torch.no_grad():
|
223 |
+
alpha = v / ((1 + eps) - iou + v)
|
224 |
+
return iou - (rho2 / c2 + v * alpha) # CIoU
|
225 |
+
elif EIoU: # Efficient IoU https://arxiv.org/abs/2101.08158
|
226 |
+
rho3 = (w1-w2) **2
|
227 |
+
c3 = cw ** 2 + eps
|
228 |
+
rho4 = (h1-h2) **2
|
229 |
+
c4 = ch ** 2 + eps
|
230 |
+
return iou - rho2 / c2 - rho3 / c3 - rho4 / c4 # EIoU
|
231 |
+
elif ECIoU:
|
232 |
+
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
|
233 |
+
with torch.no_grad():
|
234 |
+
alpha = v / ((1 + eps) - iou + v)
|
235 |
+
rho3 = (w1-w2) **2
|
236 |
+
c3 = cw ** 2 + eps
|
237 |
+
rho4 = (h1-h2) **2
|
238 |
+
c4 = ch ** 2 + eps
|
239 |
+
return iou - v * alpha - rho2 / c2 - rho3 / c3 - rho4 / c4 # ECIoU
|
240 |
+
else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
|
241 |
+
c_area = cw * ch + eps # convex area
|
242 |
+
return iou - (c_area - union) / c_area # GIoU
|
243 |
+
else:
|
244 |
+
return iou # IoU
|
245 |
+
|
246 |
+
|
247 |
+
def box_iou(box1, box2):
|
248 |
+
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
|
249 |
+
"""
|
250 |
+
Return intersection-over-union (Jaccard index) of boxes.
|
251 |
+
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
|
252 |
+
Arguments:
|
253 |
+
box1 (Tensor[N, 4])
|
254 |
+
box2 (Tensor[M, 4])
|
255 |
+
Returns:
|
256 |
+
iou (Tensor[N, M]): the NxM matrix containing the pairwise
|
257 |
+
IoU values for every element in boxes1 and boxes2
|
258 |
+
"""
|
259 |
+
|
260 |
+
def box_area(box):
|
261 |
+
# box = 4xn
|
262 |
+
return (box[2] - box[0]) * (box[3] - box[1])
|
263 |
+
|
264 |
+
area1 = box_area(box1.T)
|
265 |
+
area2 = box_area(box2.T)
|
266 |
+
|
267 |
+
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
|
268 |
+
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
|
269 |
+
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
|
270 |
+
|
271 |
+
|
272 |
+
def wh_iou(wh1, wh2):
|
273 |
+
# Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
|
274 |
+
wh1 = wh1[:, None] # [N,1,2]
|
275 |
+
wh2 = wh2[None] # [1,M,2]
|
276 |
+
inter = torch.min(wh1, wh2).prod(2) # [N,M]
|
277 |
+
return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter)
|
278 |
+
|
279 |
+
|
280 |
+
def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False):
|
281 |
+
"""Performs Non-Maximum Suppression (NMS) on inference results
|
282 |
+
|
283 |
+
Returns:
|
284 |
+
detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
|
285 |
+
"""
|
286 |
+
|
287 |
+
nc = prediction[0].shape[1] - 5 # number of classes
|
288 |
+
xc = prediction[..., 4] > conf_thres # candidates
|
289 |
+
|
290 |
+
# Settings
|
291 |
+
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
|
292 |
+
max_det = 300 # maximum number of detections per image
|
293 |
+
time_limit = 10.0 # seconds to quit after
|
294 |
+
redundant = True # require redundant detections
|
295 |
+
multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
|
296 |
+
|
297 |
+
t = time.time()
|
298 |
+
output = [torch.zeros(0, 6)] * prediction.shape[0]
|
299 |
+
for xi, x in enumerate(prediction): # image index, image inference
|
300 |
+
# Apply constraints
|
301 |
+
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
302 |
+
x = x[xc[xi]] # confidence
|
303 |
+
|
304 |
+
# If none remain process next image
|
305 |
+
if not x.shape[0]:
|
306 |
+
continue
|
307 |
+
|
308 |
+
# Compute conf
|
309 |
+
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
|
310 |
+
|
311 |
+
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
|
312 |
+
box = xywh2xyxy(x[:, :4])
|
313 |
+
|
314 |
+
# Detections matrix nx6 (xyxy, conf, cls)
|
315 |
+
if multi_label:
|
316 |
+
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
|
317 |
+
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
|
318 |
+
else: # best class only
|
319 |
+
conf, j = x[:, 5:].max(1, keepdim=True)
|
320 |
+
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
|
321 |
+
|
322 |
+
# Filter by class
|
323 |
+
if classes:
|
324 |
+
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
325 |
+
|
326 |
+
# Apply finite constraint
|
327 |
+
# if not torch.isfinite(x).all():
|
328 |
+
# x = x[torch.isfinite(x).all(1)]
|
329 |
+
|
330 |
+
# If none remain process next image
|
331 |
+
n = x.shape[0] # number of boxes
|
332 |
+
if not n:
|
333 |
+
continue
|
334 |
+
|
335 |
+
# Sort by confidence
|
336 |
+
# x = x[x[:, 4].argsort(descending=True)]
|
337 |
+
|
338 |
+
# Batched NMS
|
339 |
+
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
340 |
+
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
341 |
+
i = torch.ops.torchvision.nms(boxes, scores, iou_thres)
|
342 |
+
if i.shape[0] > max_det: # limit detections
|
343 |
+
i = i[:max_det]
|
344 |
+
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
345 |
+
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
346 |
+
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
347 |
+
weights = iou * scores[None] # box weights
|
348 |
+
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
349 |
+
if redundant:
|
350 |
+
i = i[iou.sum(1) > 1] # require redundancy
|
351 |
+
|
352 |
+
output[xi] = x[i]
|
353 |
+
if (time.time() - t) > time_limit:
|
354 |
+
break # time limit exceeded
|
355 |
+
|
356 |
+
return output
|
357 |
+
|
358 |
+
|
359 |
+
def strip_optimizer(f='weights/best.pt', s=''): # from utils.general import *; strip_optimizer()
|
360 |
+
# Strip optimizer from 'f' to finalize training, optionally save as 's'
|
361 |
+
x = torch.load(f, map_location=torch.device('cpu'))
|
362 |
+
x['optimizer'] = None
|
363 |
+
x['training_results'] = None
|
364 |
+
x['epoch'] = -1
|
365 |
+
#x['model'].half() # to FP16
|
366 |
+
#for p in x['model'].parameters():
|
367 |
+
# p.requires_grad = False
|
368 |
+
torch.save(x, s or f)
|
369 |
+
mb = os.path.getsize(s or f) / 1E6 # filesize
|
370 |
+
print('Optimizer stripped from %s,%s %.1fMB' % (f, (' saved as %s,' % s) if s else '', mb))
|
371 |
+
|
372 |
+
|
373 |
+
def print_mutation(hyp, results, yaml_file='hyp_evolved.yaml', bucket=''):
|
374 |
+
# Print mutation results to evolve.txt (for use with train.py --evolve)
|
375 |
+
a = '%10s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys
|
376 |
+
b = '%10.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values
|
377 |
+
c = '%10.4g' * len(results) % results # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
|
378 |
+
print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
|
379 |
+
|
380 |
+
if bucket:
|
381 |
+
url = 'gs://%s/evolve.txt' % bucket
|
382 |
+
if gsutil_getsize(url) > (os.path.getsize('evolve.txt') if os.path.exists('evolve.txt') else 0):
|
383 |
+
os.system('gsutil cp %s .' % url) # download evolve.txt if larger than local
|
384 |
+
|
385 |
+
with open('evolve.txt', 'a') as f: # append result
|
386 |
+
f.write(c + b + '\n')
|
387 |
+
x = np.unique(np.loadtxt('evolve.txt', ndmin=2), axis=0) # load unique rows
|
388 |
+
x = x[np.argsort(-fitness(x))] # sort
|
389 |
+
np.savetxt('evolve.txt', x, '%10.3g') # save sort by fitness
|
390 |
+
|
391 |
+
# Save yaml
|
392 |
+
for i, k in enumerate(hyp.keys()):
|
393 |
+
hyp[k] = float(x[0, i + 7])
|
394 |
+
with open(yaml_file, 'w') as f:
|
395 |
+
results = tuple(x[0, :7])
|
396 |
+
c = '%10.4g' * len(results) % results # results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
|
397 |
+
f.write('# Hyperparameter Evolution Results\n# Generations: %g\n# Metrics: ' % len(x) + c + '\n\n')
|
398 |
+
yaml.dump(hyp, f, sort_keys=False)
|
399 |
+
|
400 |
+
if bucket:
|
401 |
+
os.system('gsutil cp evolve.txt %s gs://%s' % (yaml_file, bucket)) # upload
|
402 |
+
|
403 |
+
|
404 |
+
def apply_classifier(x, model, img, im0):
|
405 |
+
# applies a second stage classifier to yolo outputs
|
406 |
+
im0 = [im0] if isinstance(im0, np.ndarray) else im0
|
407 |
+
for i, d in enumerate(x): # per image
|
408 |
+
if d is not None and len(d):
|
409 |
+
d = d.clone()
|
410 |
+
|
411 |
+
# Reshape and pad cutouts
|
412 |
+
b = xyxy2xywh(d[:, :4]) # boxes
|
413 |
+
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square
|
414 |
+
b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad
|
415 |
+
d[:, :4] = xywh2xyxy(b).long()
|
416 |
+
|
417 |
+
# Rescale boxes from img_size to im0 size
|
418 |
+
scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
|
419 |
+
|
420 |
+
# Classes
|
421 |
+
pred_cls1 = d[:, 5].long()
|
422 |
+
ims = []
|
423 |
+
for j, a in enumerate(d): # per item
|
424 |
+
cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
|
425 |
+
im = cv2.resize(cutout, (224, 224)) # BGR
|
426 |
+
# cv2.imwrite('test%i.jpg' % j, cutout)
|
427 |
+
|
428 |
+
im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
|
429 |
+
im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32
|
430 |
+
im /= 255.0 # 0 - 255 to 0.0 - 1.0
|
431 |
+
ims.append(im)
|
432 |
+
|
433 |
+
pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction
|
434 |
+
x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections
|
435 |
+
|
436 |
+
return x
|
437 |
+
|
438 |
+
|
439 |
+
def increment_path(path, exist_ok=True, sep=''):
|
440 |
+
# Increment path, i.e. runs/exp --> runs/exp{sep}0, runs/exp{sep}1 etc.
|
441 |
+
path = Path(path) # os-agnostic
|
442 |
+
if (path.exists() and exist_ok) or (not path.exists()):
|
443 |
+
return str(path)
|
444 |
+
else:
|
445 |
+
dirs = glob.glob(f"{path}{sep}*") # similar paths
|
446 |
+
matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
|
447 |
+
i = [int(m.groups()[0]) for m in matches if m] # indices
|
448 |
+
n = max(i) + 1 if i else 2 # increment number
|
449 |
+
return f"{path}{sep}{n}" # update path
|
asone/detectors/yolor/utils/google_utils.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Google utils: https://cloud.google.com/storage/docs/reference/libraries
|
2 |
+
|
3 |
+
import os
|
4 |
+
import platform
|
5 |
+
import subprocess
|
6 |
+
import time
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
+
import torch
|
10 |
+
import torch.nn as nn
|
11 |
+
|
12 |
+
|
13 |
+
def gsutil_getsize(url=''):
|
14 |
+
# gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
|
15 |
+
s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8')
|
16 |
+
return eval(s.split(' ')[0]) if len(s) else 0 # bytes
|
17 |
+
|
18 |
+
|
19 |
+
def attempt_download(weights):
|
20 |
+
# Attempt to download pretrained weights if not found locally
|
21 |
+
weights = weights.strip().replace("'", '')
|
22 |
+
file = Path(weights).name
|
23 |
+
|
24 |
+
msg = weights + ' missing, try downloading from https://github.com/WongKinYiu/yolor/releases/'
|
25 |
+
models = ['yolor_p6.pt', 'yolor_w6.pt'] # available models
|
26 |
+
|
27 |
+
if file in models and not os.path.isfile(weights):
|
28 |
+
|
29 |
+
try: # GitHub
|
30 |
+
url = 'https://github.com/WongKinYiu/yolor/releases/download/v1.0/' + file
|
31 |
+
print('Downloading %s to %s...' % (url, weights))
|
32 |
+
torch.hub.download_url_to_file(url, weights)
|
33 |
+
assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check
|
34 |
+
except Exception as e: # GCP
|
35 |
+
print('ERROR: Download failure.')
|
36 |
+
print('')
|
37 |
+
|
38 |
+
|
39 |
+
def attempt_load(weights, map_location=None):
|
40 |
+
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
|
41 |
+
model = Ensemble()
|
42 |
+
for w in weights if isinstance(weights, list) else [weights]:
|
43 |
+
attempt_download(w)
|
44 |
+
model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model
|
45 |
+
|
46 |
+
if len(model) == 1:
|
47 |
+
return model[-1] # return model
|
48 |
+
else:
|
49 |
+
print('Ensemble created with %s\n' % weights)
|
50 |
+
for k in ['names', 'stride']:
|
51 |
+
setattr(model, k, getattr(model[-1], k))
|
52 |
+
return model # return ensemble
|
53 |
+
|
54 |
+
|
55 |
+
def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'):
|
56 |
+
# Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download()
|
57 |
+
t = time.time()
|
58 |
+
|
59 |
+
print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
|
60 |
+
os.remove(name) if os.path.exists(name) else None # remove existing
|
61 |
+
os.remove('cookie') if os.path.exists('cookie') else None
|
62 |
+
|
63 |
+
# Attempt file download
|
64 |
+
out = "NUL" if platform.system() == "Windows" else "/dev/null"
|
65 |
+
os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out))
|
66 |
+
if os.path.exists('cookie'): # large file
|
67 |
+
s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name)
|
68 |
+
else: # small file
|
69 |
+
s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id)
|
70 |
+
r = os.system(s) # execute, capture return
|
71 |
+
os.remove('cookie') if os.path.exists('cookie') else None
|
72 |
+
|
73 |
+
# Error check
|
74 |
+
if r != 0:
|
75 |
+
os.remove(name) if os.path.exists(name) else None # remove partial
|
76 |
+
print('Download error ') # raise Exception('Download error')
|
77 |
+
return r
|
78 |
+
|
79 |
+
# Unzip if archive
|
80 |
+
if name.endswith('.zip'):
|
81 |
+
print('unzipping... ', end='')
|
82 |
+
os.system('unzip -q %s' % name) # unzip
|
83 |
+
os.remove(name) # remove zip to free space
|
84 |
+
|
85 |
+
print('Done (%.1fs)' % (time.time() - t))
|
86 |
+
return r
|
87 |
+
|
88 |
+
|
89 |
+
def get_token(cookie="./cookie"):
|
90 |
+
with open(cookie) as f:
|
91 |
+
for line in f:
|
92 |
+
if "download" in line:
|
93 |
+
return line.split()[-1]
|
94 |
+
return ""
|
95 |
+
|
96 |
+
class Ensemble(nn.ModuleList):
|
97 |
+
# Ensemble of models
|
98 |
+
def __init__(self):
|
99 |
+
super().__init__()
|
100 |
+
|
101 |
+
def forward(self, x, augment=False, profile=False, visualize=False):
|
102 |
+
y = [module(x, augment, profile, visualize)[0] for module in self]
|
103 |
+
# y = torch.stack(y).max(0)[0] # max ensemble
|
104 |
+
# y = torch.stack(y).mean(0) # mean ensemble
|
105 |
+
y = torch.cat(y, 1) # nms ensemble
|
106 |
+
return y, None # inference, train output
|
107 |
+
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
|
108 |
+
# # Uploads a file to a bucket
|
109 |
+
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
|
110 |
+
#
|
111 |
+
# storage_client = storage.Client()
|
112 |
+
# bucket = storage_client.get_bucket(bucket_name)
|
113 |
+
# blob = bucket.blob(destination_blob_name)
|
114 |
+
#
|
115 |
+
# blob.upload_from_filename(source_file_name)
|
116 |
+
#
|
117 |
+
# print('File {} uploaded to {}.'.format(
|
118 |
+
# source_file_name,
|
119 |
+
# destination_blob_name))
|
120 |
+
#
|
121 |
+
#
|
122 |
+
# def download_blob(bucket_name, source_blob_name, destination_file_name):
|
123 |
+
# # Uploads a blob from a bucket
|
124 |
+
# storage_client = storage.Client()
|
125 |
+
# bucket = storage_client.get_bucket(bucket_name)
|
126 |
+
# blob = bucket.blob(source_blob_name)
|
127 |
+
#
|
128 |
+
# blob.download_to_filename(destination_file_name)
|
129 |
+
#
|
130 |
+
# print('Blob {} downloaded to {}.'.format(
|
131 |
+
# source_blob_name,
|
132 |
+
# destination_file_name))
|
asone/detectors/yolor/utils/layers.py
ADDED
@@ -0,0 +1,532 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from asone.detectors.yolor.utils.general import *
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from torch import nn
|
5 |
+
import torch.nn.functional as F
|
6 |
+
try:
|
7 |
+
from mish_cuda import MishCuda as Mish
|
8 |
+
|
9 |
+
except:
|
10 |
+
class Mish(nn.Module): # https://github.com/digantamisra98/Mish
|
11 |
+
def forward(self, x):
|
12 |
+
return x * F.softplus(x).tanh()
|
13 |
+
|
14 |
+
try:
|
15 |
+
from pytorch_wavelets import DWTForward, DWTInverse
|
16 |
+
|
17 |
+
class DWT(nn.Module):
|
18 |
+
def __init__(self):
|
19 |
+
super(DWT, self).__init__()
|
20 |
+
self.xfm = DWTForward(J=1, wave='db1', mode='zero')
|
21 |
+
|
22 |
+
def forward(self, x):
|
23 |
+
b,c,w,h = x.shape
|
24 |
+
yl, yh = self.xfm(x)
|
25 |
+
return torch.cat([yl/2., yh[0].view(b,-1,w//2,h//2)/2.+.5], 1)
|
26 |
+
|
27 |
+
except: # using Reorg instead
|
28 |
+
class DWT(nn.Module):
|
29 |
+
def forward(self, x):
|
30 |
+
return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
|
31 |
+
|
32 |
+
|
33 |
+
class Reorg(nn.Module):
|
34 |
+
def forward(self, x):
|
35 |
+
return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
|
36 |
+
|
37 |
+
|
38 |
+
def make_divisible(v, divisor):
|
39 |
+
# Function ensures all layers have a channel number that is divisible by 8
|
40 |
+
# https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
|
41 |
+
return math.ceil(v / divisor) * divisor
|
42 |
+
|
43 |
+
|
44 |
+
class Flatten(nn.Module):
|
45 |
+
# Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
|
46 |
+
def forward(self, x):
|
47 |
+
return x.view(x.size(0), -1)
|
48 |
+
|
49 |
+
|
50 |
+
class Concat(nn.Module):
|
51 |
+
# Concatenate a list of tensors along dimension
|
52 |
+
def __init__(self, dimension=1):
|
53 |
+
super(Concat, self).__init__()
|
54 |
+
self.d = dimension
|
55 |
+
|
56 |
+
def forward(self, x):
|
57 |
+
return torch.cat(x, self.d)
|
58 |
+
|
59 |
+
|
60 |
+
class FeatureConcat(nn.Module):
|
61 |
+
def __init__(self, layers):
|
62 |
+
super(FeatureConcat, self).__init__()
|
63 |
+
self.layers = layers # layer indices
|
64 |
+
self.multiple = len(layers) > 1 # multiple layers flag
|
65 |
+
|
66 |
+
def forward(self, x, outputs):
|
67 |
+
return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]]
|
68 |
+
|
69 |
+
|
70 |
+
class FeatureConcat2(nn.Module):
|
71 |
+
def __init__(self, layers):
|
72 |
+
super(FeatureConcat2, self).__init__()
|
73 |
+
self.layers = layers # layer indices
|
74 |
+
self.multiple = len(layers) > 1 # multiple layers flag
|
75 |
+
|
76 |
+
def forward(self, x, outputs):
|
77 |
+
return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach()], 1)
|
78 |
+
|
79 |
+
|
80 |
+
class FeatureConcat3(nn.Module):
|
81 |
+
def __init__(self, layers):
|
82 |
+
super(FeatureConcat3, self).__init__()
|
83 |
+
self.layers = layers # layer indices
|
84 |
+
self.multiple = len(layers) > 1 # multiple layers flag
|
85 |
+
|
86 |
+
def forward(self, x, outputs):
|
87 |
+
return torch.cat([outputs[self.layers[0]], outputs[self.layers[1]].detach(), outputs[self.layers[2]].detach()], 1)
|
88 |
+
|
89 |
+
|
90 |
+
class FeatureConcat_l(nn.Module):
|
91 |
+
def __init__(self, layers):
|
92 |
+
super(FeatureConcat_l, self).__init__()
|
93 |
+
self.layers = layers # layer indices
|
94 |
+
self.multiple = len(layers) > 1 # multiple layers flag
|
95 |
+
|
96 |
+
def forward(self, x, outputs):
|
97 |
+
return torch.cat([outputs[i][:,:outputs[i].shape[1]//2,:,:] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]][:,:outputs[self.layers[0]].shape[1]//2,:,:]
|
98 |
+
|
99 |
+
|
100 |
+
class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
101 |
+
def __init__(self, layers, weight=False):
|
102 |
+
super(WeightedFeatureFusion, self).__init__()
|
103 |
+
self.layers = layers # layer indices
|
104 |
+
self.weight = weight # apply weights boolean
|
105 |
+
self.n = len(layers) + 1 # number of layers
|
106 |
+
if weight:
|
107 |
+
self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True) # layer weights
|
108 |
+
|
109 |
+
def forward(self, x, outputs):
|
110 |
+
# Weights
|
111 |
+
if self.weight:
|
112 |
+
w = torch.sigmoid(self.w) * (2 / self.n) # sigmoid weights (0-1)
|
113 |
+
x = x * w[0]
|
114 |
+
|
115 |
+
# Fusion
|
116 |
+
nx = x.shape[1] # input channels
|
117 |
+
for i in range(self.n - 1):
|
118 |
+
a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]] # feature to add
|
119 |
+
na = a.shape[1] # feature channels
|
120 |
+
|
121 |
+
# Adjust channels
|
122 |
+
if nx == na: # same shape
|
123 |
+
x = x + a
|
124 |
+
elif nx > na: # slice input
|
125 |
+
x[:, :na] = x[:, :na] + a # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a
|
126 |
+
else: # slice feature
|
127 |
+
x = x + a[:, :nx]
|
128 |
+
|
129 |
+
return x
|
130 |
+
|
131 |
+
|
132 |
+
class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595
|
133 |
+
def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'):
|
134 |
+
super(MixConv2d, self).__init__()
|
135 |
+
|
136 |
+
groups = len(k)
|
137 |
+
if method == 'equal_ch': # equal channels per group
|
138 |
+
i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices
|
139 |
+
ch = [(i == g).sum() for g in range(groups)]
|
140 |
+
else: # 'equal_params': equal parameter count per group
|
141 |
+
b = [out_ch] + [0] * groups
|
142 |
+
a = np.eye(groups + 1, groups, k=-1)
|
143 |
+
a -= np.roll(a, 1, axis=1)
|
144 |
+
a *= np.array(k) ** 2
|
145 |
+
a[0] = 1
|
146 |
+
ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b
|
147 |
+
|
148 |
+
self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch,
|
149 |
+
out_channels=ch[g],
|
150 |
+
kernel_size=k[g],
|
151 |
+
stride=stride,
|
152 |
+
padding=k[g] // 2, # 'same' pad
|
153 |
+
dilation=dilation,
|
154 |
+
bias=bias) for g in range(groups)])
|
155 |
+
|
156 |
+
def forward(self, x):
|
157 |
+
return torch.cat([m(x) for m in self.m], 1)
|
158 |
+
|
159 |
+
|
160 |
+
# Activation functions below -------------------------------------------------------------------------------------------
|
161 |
+
class SwishImplementation(torch.autograd.Function):
|
162 |
+
@staticmethod
|
163 |
+
def forward(ctx, x):
|
164 |
+
ctx.save_for_backward(x)
|
165 |
+
return x * torch.sigmoid(x)
|
166 |
+
|
167 |
+
@staticmethod
|
168 |
+
def backward(ctx, grad_output):
|
169 |
+
x = ctx.saved_tensors[0]
|
170 |
+
sx = torch.sigmoid(x) # sigmoid(ctx)
|
171 |
+
return grad_output * (sx * (1 + x * (1 - sx)))
|
172 |
+
|
173 |
+
|
174 |
+
class MishImplementation(torch.autograd.Function):
|
175 |
+
@staticmethod
|
176 |
+
def forward(ctx, x):
|
177 |
+
ctx.save_for_backward(x)
|
178 |
+
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
|
179 |
+
|
180 |
+
@staticmethod
|
181 |
+
def backward(ctx, grad_output):
|
182 |
+
x = ctx.saved_tensors[0]
|
183 |
+
sx = torch.sigmoid(x)
|
184 |
+
fx = F.softplus(x).tanh()
|
185 |
+
return grad_output * (fx + x * sx * (1 - fx * fx))
|
186 |
+
|
187 |
+
|
188 |
+
class MemoryEfficientSwish(nn.Module):
|
189 |
+
def forward(self, x):
|
190 |
+
return SwishImplementation.apply(x)
|
191 |
+
|
192 |
+
|
193 |
+
class MemoryEfficientMish(nn.Module):
|
194 |
+
def forward(self, x):
|
195 |
+
return MishImplementation.apply(x)
|
196 |
+
|
197 |
+
|
198 |
+
class Swish(nn.Module):
|
199 |
+
def forward(self, x):
|
200 |
+
return x * torch.sigmoid(x)
|
201 |
+
|
202 |
+
|
203 |
+
class HardSwish(nn.Module): # https://arxiv.org/pdf/1905.02244.pdf
|
204 |
+
def forward(self, x):
|
205 |
+
return x * F.hardtanh(x + 3, 0., 6., True) / 6.
|
206 |
+
|
207 |
+
|
208 |
+
class DeformConv2d(nn.Module):
|
209 |
+
def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None, modulation=False):
|
210 |
+
"""
|
211 |
+
Args:
|
212 |
+
modulation (bool, optional): If True, Modulated Defomable Convolution (Deformable ConvNets v2).
|
213 |
+
"""
|
214 |
+
super(DeformConv2d, self).__init__()
|
215 |
+
self.kernel_size = kernel_size
|
216 |
+
self.padding = padding
|
217 |
+
self.stride = stride
|
218 |
+
self.zero_padding = nn.ZeroPad2d(padding)
|
219 |
+
self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias)
|
220 |
+
|
221 |
+
self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride)
|
222 |
+
nn.init.constant_(self.p_conv.weight, 0)
|
223 |
+
self.p_conv.register_backward_hook(self._set_lr)
|
224 |
+
|
225 |
+
self.modulation = modulation
|
226 |
+
if modulation:
|
227 |
+
self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride)
|
228 |
+
nn.init.constant_(self.m_conv.weight, 0)
|
229 |
+
self.m_conv.register_backward_hook(self._set_lr)
|
230 |
+
|
231 |
+
@staticmethod
|
232 |
+
def _set_lr(module, grad_input, grad_output):
|
233 |
+
grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input)))
|
234 |
+
grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output)))
|
235 |
+
|
236 |
+
def forward(self, x):
|
237 |
+
offset = self.p_conv(x)
|
238 |
+
if self.modulation:
|
239 |
+
m = torch.sigmoid(self.m_conv(x))
|
240 |
+
|
241 |
+
dtype = offset.data.type()
|
242 |
+
ks = self.kernel_size
|
243 |
+
N = offset.size(1) // 2
|
244 |
+
|
245 |
+
if self.padding:
|
246 |
+
x = self.zero_padding(x)
|
247 |
+
|
248 |
+
# (b, 2N, h, w)
|
249 |
+
p = self._get_p(offset, dtype)
|
250 |
+
|
251 |
+
# (b, h, w, 2N)
|
252 |
+
p = p.contiguous().permute(0, 2, 3, 1)
|
253 |
+
q_lt = p.detach().floor()
|
254 |
+
q_rb = q_lt + 1
|
255 |
+
|
256 |
+
q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long()
|
257 |
+
q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long()
|
258 |
+
q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1)
|
259 |
+
q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1)
|
260 |
+
|
261 |
+
# clip p
|
262 |
+
p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1)
|
263 |
+
|
264 |
+
# bilinear kernel (b, h, w, N)
|
265 |
+
g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:]))
|
266 |
+
g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:]))
|
267 |
+
g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:]))
|
268 |
+
g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:]))
|
269 |
+
|
270 |
+
# (b, c, h, w, N)
|
271 |
+
x_q_lt = self._get_x_q(x, q_lt, N)
|
272 |
+
x_q_rb = self._get_x_q(x, q_rb, N)
|
273 |
+
x_q_lb = self._get_x_q(x, q_lb, N)
|
274 |
+
x_q_rt = self._get_x_q(x, q_rt, N)
|
275 |
+
|
276 |
+
# (b, c, h, w, N)
|
277 |
+
x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \
|
278 |
+
g_rb.unsqueeze(dim=1) * x_q_rb + \
|
279 |
+
g_lb.unsqueeze(dim=1) * x_q_lb + \
|
280 |
+
g_rt.unsqueeze(dim=1) * x_q_rt
|
281 |
+
|
282 |
+
# modulation
|
283 |
+
if self.modulation:
|
284 |
+
m = m.contiguous().permute(0, 2, 3, 1)
|
285 |
+
m = m.unsqueeze(dim=1)
|
286 |
+
m = torch.cat([m for _ in range(x_offset.size(1))], dim=1)
|
287 |
+
x_offset *= m
|
288 |
+
|
289 |
+
x_offset = self._reshape_x_offset(x_offset, ks)
|
290 |
+
out = self.conv(x_offset)
|
291 |
+
|
292 |
+
return out
|
293 |
+
|
294 |
+
def _get_p_n(self, N, dtype):
|
295 |
+
p_n_x, p_n_y = torch.meshgrid(
|
296 |
+
torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1),
|
297 |
+
torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1))
|
298 |
+
# (2N, 1)
|
299 |
+
p_n = torch.cat([torch.flatten(p_n_x), torch.flatten(p_n_y)], 0)
|
300 |
+
p_n = p_n.view(1, 2*N, 1, 1).type(dtype)
|
301 |
+
|
302 |
+
return p_n
|
303 |
+
|
304 |
+
def _get_p_0(self, h, w, N, dtype):
|
305 |
+
p_0_x, p_0_y = torch.meshgrid(
|
306 |
+
torch.arange(1, h*self.stride+1, self.stride),
|
307 |
+
torch.arange(1, w*self.stride+1, self.stride))
|
308 |
+
p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1)
|
309 |
+
p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1)
|
310 |
+
p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype)
|
311 |
+
|
312 |
+
return p_0
|
313 |
+
|
314 |
+
def _get_p(self, offset, dtype):
|
315 |
+
N, h, w = offset.size(1)//2, offset.size(2), offset.size(3)
|
316 |
+
|
317 |
+
# (1, 2N, 1, 1)
|
318 |
+
p_n = self._get_p_n(N, dtype)
|
319 |
+
# (1, 2N, h, w)
|
320 |
+
p_0 = self._get_p_0(h, w, N, dtype)
|
321 |
+
p = p_0 + p_n + offset
|
322 |
+
return p
|
323 |
+
|
324 |
+
def _get_x_q(self, x, q, N):
|
325 |
+
b, h, w, _ = q.size()
|
326 |
+
padded_w = x.size(3)
|
327 |
+
c = x.size(1)
|
328 |
+
# (b, c, h*w)
|
329 |
+
x = x.contiguous().view(b, c, -1)
|
330 |
+
|
331 |
+
# (b, h, w, N)
|
332 |
+
index = q[..., :N]*padded_w + q[..., N:] # offset_x*w + offset_y
|
333 |
+
# (b, c, h*w*N)
|
334 |
+
index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1)
|
335 |
+
|
336 |
+
x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N)
|
337 |
+
|
338 |
+
return x_offset
|
339 |
+
|
340 |
+
@staticmethod
|
341 |
+
def _reshape_x_offset(x_offset, ks):
|
342 |
+
b, c, h, w, N = x_offset.size()
|
343 |
+
x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1)
|
344 |
+
x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks)
|
345 |
+
|
346 |
+
return x_offset
|
347 |
+
|
348 |
+
|
349 |
+
class GAP(nn.Module):
|
350 |
+
def __init__(self):
|
351 |
+
super(GAP, self).__init__()
|
352 |
+
self.avg_pool = nn.AdaptiveAvgPool2d(1)
|
353 |
+
def forward(self, x):
|
354 |
+
#b, c, _, _ = x.size()
|
355 |
+
return self.avg_pool(x)#.view(b, c)
|
356 |
+
|
357 |
+
|
358 |
+
class Silence(nn.Module):
|
359 |
+
def __init__(self):
|
360 |
+
super(Silence, self).__init__()
|
361 |
+
def forward(self, x):
|
362 |
+
return x
|
363 |
+
|
364 |
+
|
365 |
+
class ScaleChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
366 |
+
def __init__(self, layers):
|
367 |
+
super(ScaleChannel, self).__init__()
|
368 |
+
self.layers = layers # layer indices
|
369 |
+
|
370 |
+
def forward(self, x, outputs):
|
371 |
+
a = outputs[self.layers[0]]
|
372 |
+
return x.expand_as(a) * a
|
373 |
+
|
374 |
+
|
375 |
+
class ShiftChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
376 |
+
def __init__(self, layers):
|
377 |
+
super(ShiftChannel, self).__init__()
|
378 |
+
self.layers = layers # layer indices
|
379 |
+
|
380 |
+
def forward(self, x, outputs):
|
381 |
+
a = outputs[self.layers[0]]
|
382 |
+
return a.expand_as(x) + x
|
383 |
+
|
384 |
+
|
385 |
+
class ShiftChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
386 |
+
def __init__(self, layers):
|
387 |
+
super(ShiftChannel2D, self).__init__()
|
388 |
+
self.layers = layers # layer indices
|
389 |
+
|
390 |
+
def forward(self, x, outputs):
|
391 |
+
a = outputs[self.layers[0]].view(1,-1,1,1)
|
392 |
+
return a.expand_as(x) + x
|
393 |
+
|
394 |
+
|
395 |
+
class ControlChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
396 |
+
def __init__(self, layers):
|
397 |
+
super(ControlChannel, self).__init__()
|
398 |
+
self.layers = layers # layer indices
|
399 |
+
|
400 |
+
def forward(self, x, outputs):
|
401 |
+
a = outputs[self.layers[0]]
|
402 |
+
return a.expand_as(x) * x
|
403 |
+
|
404 |
+
|
405 |
+
class ControlChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
406 |
+
def __init__(self, layers):
|
407 |
+
super(ControlChannel2D, self).__init__()
|
408 |
+
self.layers = layers # layer indices
|
409 |
+
|
410 |
+
def forward(self, x, outputs):
|
411 |
+
a = outputs[self.layers[0]].view(1,-1,1,1)
|
412 |
+
return a.expand_as(x) * x
|
413 |
+
|
414 |
+
|
415 |
+
class AlternateChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
416 |
+
def __init__(self, layers):
|
417 |
+
super(AlternateChannel, self).__init__()
|
418 |
+
self.layers = layers # layer indices
|
419 |
+
|
420 |
+
def forward(self, x, outputs):
|
421 |
+
a = outputs[self.layers[0]]
|
422 |
+
return torch.cat([a.expand_as(x), x], dim=1)
|
423 |
+
|
424 |
+
|
425 |
+
class AlternateChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
426 |
+
def __init__(self, layers):
|
427 |
+
super(AlternateChannel2D, self).__init__()
|
428 |
+
self.layers = layers # layer indices
|
429 |
+
|
430 |
+
def forward(self, x, outputs):
|
431 |
+
a = outputs[self.layers[0]].view(1,-1,1,1)
|
432 |
+
return torch.cat([a.expand_as(x), x], dim=1)
|
433 |
+
|
434 |
+
|
435 |
+
class SelectChannel(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
436 |
+
def __init__(self, layers):
|
437 |
+
super(SelectChannel, self).__init__()
|
438 |
+
self.layers = layers # layer indices
|
439 |
+
|
440 |
+
def forward(self, x, outputs):
|
441 |
+
a = outputs[self.layers[0]]
|
442 |
+
return a.sigmoid().expand_as(x) * x
|
443 |
+
|
444 |
+
|
445 |
+
class SelectChannel2D(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
446 |
+
def __init__(self, layers):
|
447 |
+
super(SelectChannel2D, self).__init__()
|
448 |
+
self.layers = layers # layer indices
|
449 |
+
|
450 |
+
def forward(self, x, outputs):
|
451 |
+
a = outputs[self.layers[0]].view(1,-1,1,1)
|
452 |
+
return a.sigmoid().expand_as(x) * x
|
453 |
+
|
454 |
+
|
455 |
+
class ScaleSpatial(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
456 |
+
def __init__(self, layers):
|
457 |
+
super(ScaleSpatial, self).__init__()
|
458 |
+
self.layers = layers # layer indices
|
459 |
+
|
460 |
+
def forward(self, x, outputs):
|
461 |
+
a = outputs[self.layers[0]]
|
462 |
+
return x * a
|
463 |
+
|
464 |
+
|
465 |
+
class ImplicitA(nn.Module):
|
466 |
+
def __init__(self, channel):
|
467 |
+
super(ImplicitA, self).__init__()
|
468 |
+
self.channel = channel
|
469 |
+
self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
|
470 |
+
nn.init.normal_(self.implicit, std=.02)
|
471 |
+
|
472 |
+
def forward(self):
|
473 |
+
return self.implicit
|
474 |
+
|
475 |
+
|
476 |
+
class ImplicitC(nn.Module):
|
477 |
+
def __init__(self, channel):
|
478 |
+
super(ImplicitC, self).__init__()
|
479 |
+
self.channel = channel
|
480 |
+
self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
|
481 |
+
nn.init.normal_(self.implicit, std=.02)
|
482 |
+
|
483 |
+
def forward(self):
|
484 |
+
return self.implicit
|
485 |
+
|
486 |
+
|
487 |
+
class ImplicitM(nn.Module):
|
488 |
+
def __init__(self, channel):
|
489 |
+
super(ImplicitM, self).__init__()
|
490 |
+
self.channel = channel
|
491 |
+
self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1))
|
492 |
+
nn.init.normal_(self.implicit, mean=1., std=.02)
|
493 |
+
|
494 |
+
def forward(self):
|
495 |
+
return self.implicit
|
496 |
+
|
497 |
+
|
498 |
+
|
499 |
+
class Implicit2DA(nn.Module):
|
500 |
+
def __init__(self, atom, channel):
|
501 |
+
super(Implicit2DA, self).__init__()
|
502 |
+
self.channel = channel
|
503 |
+
self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1))
|
504 |
+
nn.init.normal_(self.implicit, std=.02)
|
505 |
+
|
506 |
+
def forward(self):
|
507 |
+
return self.implicit
|
508 |
+
|
509 |
+
|
510 |
+
class Implicit2DC(nn.Module):
|
511 |
+
def __init__(self, atom, channel):
|
512 |
+
super(Implicit2DC, self).__init__()
|
513 |
+
self.channel = channel
|
514 |
+
self.implicit = nn.Parameter(torch.zeros(1, atom, channel, 1))
|
515 |
+
nn.init.normal_(self.implicit, std=.02)
|
516 |
+
|
517 |
+
def forward(self):
|
518 |
+
return self.implicit
|
519 |
+
|
520 |
+
|
521 |
+
class Implicit2DM(nn.Module):
|
522 |
+
def __init__(self, atom, channel):
|
523 |
+
super(Implicit2DM, self).__init__()
|
524 |
+
self.channel = channel
|
525 |
+
self.implicit = nn.Parameter(torch.ones(1, atom, channel, 1))
|
526 |
+
nn.init.normal_(self.implicit, mean=1., std=.02)
|
527 |
+
|
528 |
+
def forward(self):
|
529 |
+
return self.implicit
|
530 |
+
|
531 |
+
|
532 |
+
|
asone/detectors/yolor/utils/loss.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Loss functions
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
|
6 |
+
from asone.detectors.yolor.utils.general import bbox_iou
|
7 |
+
from asone.detectors.yolor.utils.torch_utils import is_parallel
|
8 |
+
|
9 |
+
|
10 |
+
def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
|
11 |
+
# return positive, negative label smoothing BCE targets
|
12 |
+
return 1.0 - 0.5 * eps, 0.5 * eps
|
13 |
+
|
14 |
+
|
15 |
+
class BCEBlurWithLogitsLoss(nn.Module):
|
16 |
+
# BCEwithLogitLoss() with reduced missing label effects.
|
17 |
+
def __init__(self, alpha=0.05):
|
18 |
+
super(BCEBlurWithLogitsLoss, self).__init__()
|
19 |
+
self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss()
|
20 |
+
self.alpha = alpha
|
21 |
+
|
22 |
+
def forward(self, pred, true):
|
23 |
+
loss = self.loss_fcn(pred, true)
|
24 |
+
pred = torch.sigmoid(pred) # prob from logits
|
25 |
+
dx = pred - true # reduce only missing label effects
|
26 |
+
# dx = (pred - true).abs() # reduce missing label and false label effects
|
27 |
+
alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
|
28 |
+
loss *= alpha_factor
|
29 |
+
return loss.mean()
|
30 |
+
|
31 |
+
|
32 |
+
class FocalLoss(nn.Module):
|
33 |
+
# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
|
34 |
+
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
|
35 |
+
super(FocalLoss, self).__init__()
|
36 |
+
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
|
37 |
+
self.gamma = gamma
|
38 |
+
self.alpha = alpha
|
39 |
+
self.reduction = loss_fcn.reduction
|
40 |
+
self.loss_fcn.reduction = 'none' # required to apply FL to each element
|
41 |
+
|
42 |
+
def forward(self, pred, true):
|
43 |
+
loss = self.loss_fcn(pred, true)
|
44 |
+
# p_t = torch.exp(-loss)
|
45 |
+
# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
|
46 |
+
|
47 |
+
# TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
|
48 |
+
pred_prob = torch.sigmoid(pred) # prob from logits
|
49 |
+
p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
|
50 |
+
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
|
51 |
+
modulating_factor = (1.0 - p_t) ** self.gamma
|
52 |
+
loss *= alpha_factor * modulating_factor
|
53 |
+
|
54 |
+
if self.reduction == 'mean':
|
55 |
+
return loss.mean()
|
56 |
+
elif self.reduction == 'sum':
|
57 |
+
return loss.sum()
|
58 |
+
else: # 'none'
|
59 |
+
return loss
|
60 |
+
|
61 |
+
|
62 |
+
def compute_loss(p, targets, model): # predictions, targets, model
|
63 |
+
device = targets.device
|
64 |
+
#print(device)
|
65 |
+
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
|
66 |
+
tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets
|
67 |
+
h = model.hyp # hyperparameters
|
68 |
+
|
69 |
+
# Define criteria
|
70 |
+
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['cls_pw']])).to(device)
|
71 |
+
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['obj_pw']])).to(device)
|
72 |
+
|
73 |
+
# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
|
74 |
+
cp, cn = smooth_BCE(eps=0.0)
|
75 |
+
|
76 |
+
# Focal loss
|
77 |
+
g = h['fl_gamma'] # focal loss gamma
|
78 |
+
if g > 0:
|
79 |
+
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
|
80 |
+
|
81 |
+
# Losses
|
82 |
+
nt = 0 # number of targets
|
83 |
+
no = len(p) # number of outputs
|
84 |
+
balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6
|
85 |
+
balance = [4.0, 1.0, 0.5, 0.4, 0.1] if no == 5 else balance
|
86 |
+
for i, pi in enumerate(p): # layer index, layer predictions
|
87 |
+
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
|
88 |
+
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
|
89 |
+
|
90 |
+
n = b.shape[0] # number of targets
|
91 |
+
if n:
|
92 |
+
nt += n # cumulative targets
|
93 |
+
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
|
94 |
+
|
95 |
+
# Regression
|
96 |
+
pxy = ps[:, :2].sigmoid() * 2. - 0.5
|
97 |
+
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
|
98 |
+
pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box
|
99 |
+
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
|
100 |
+
lbox += (1.0 - iou).mean() # iou loss
|
101 |
+
|
102 |
+
# Objectness
|
103 |
+
tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio
|
104 |
+
|
105 |
+
# Classification
|
106 |
+
if model.nc > 1: # cls loss (only if multiple classes)
|
107 |
+
t = torch.full_like(ps[:, 5:], cn, device=device) # targets
|
108 |
+
t[range(n), tcls[i]] = cp
|
109 |
+
lcls += BCEcls(ps[:, 5:], t) # BCE
|
110 |
+
|
111 |
+
# Append targets to text file
|
112 |
+
# with open('targets.txt', 'a') as file:
|
113 |
+
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
|
114 |
+
|
115 |
+
lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss
|
116 |
+
|
117 |
+
s = 3 / no # output count scaling
|
118 |
+
lbox *= h['box'] * s
|
119 |
+
lobj *= h['obj'] * s * (1.4 if no >= 4 else 1.)
|
120 |
+
lcls *= h['cls'] * s
|
121 |
+
bs = tobj.shape[0] # batch size
|
122 |
+
|
123 |
+
loss = lbox + lobj + lcls
|
124 |
+
return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
|
125 |
+
|
126 |
+
|
127 |
+
def build_targets(p, targets, model):
|
128 |
+
nt = targets.shape[0] # number of anchors, targets
|
129 |
+
tcls, tbox, indices, anch = [], [], [], []
|
130 |
+
gain = torch.ones(6, device=targets.device) # normalized to gridspace gain
|
131 |
+
off = torch.tensor([[1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device).float() # overlap offsets
|
132 |
+
|
133 |
+
g = 0.5 # offset
|
134 |
+
multi_gpu = is_parallel(model)
|
135 |
+
for i, jj in enumerate(model.module.yolo_layers if multi_gpu else model.yolo_layers):
|
136 |
+
# get number of grid points and anchor vec for this yolo layer
|
137 |
+
anchors = model.module.module_list[jj].anchor_vec if multi_gpu else model.module_list[jj].anchor_vec
|
138 |
+
gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
|
139 |
+
|
140 |
+
# Match targets to anchors
|
141 |
+
a, t, offsets = [], targets * gain, 0
|
142 |
+
if nt:
|
143 |
+
na = anchors.shape[0] # number of anchors
|
144 |
+
at = torch.arange(na).view(na, 1).repeat(1, nt) # anchor tensor, same as .repeat_interleave(nt)
|
145 |
+
r = t[None, :, 4:6] / anchors[:, None] # wh ratio
|
146 |
+
j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare
|
147 |
+
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2))
|
148 |
+
a, t = at[j], t.repeat(na, 1, 1)[j] # filter
|
149 |
+
|
150 |
+
# overlaps
|
151 |
+
gxy = t[:, 2:4] # grid xy
|
152 |
+
z = torch.zeros_like(gxy)
|
153 |
+
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
|
154 |
+
l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T
|
155 |
+
a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0)
|
156 |
+
offsets = torch.cat((z, z[j] + off[0], z[k] + off[1], z[l] + off[2], z[m] + off[3]), 0) * g
|
157 |
+
|
158 |
+
# Define
|
159 |
+
b, c = t[:, :2].long().T # image, class
|
160 |
+
gxy = t[:, 2:4] # grid xy
|
161 |
+
gwh = t[:, 4:6] # grid wh
|
162 |
+
gij = (gxy - offsets).long()
|
163 |
+
gi, gj = gij.T # grid xy indices
|
164 |
+
|
165 |
+
# Append
|
166 |
+
#indices.append((b, a, gj, gi)) # image, anchor, grid indices
|
167 |
+
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
|
168 |
+
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
|
169 |
+
anch.append(anchors[a]) # anchors
|
170 |
+
tcls.append(c) # class
|
171 |
+
|
172 |
+
return tcls, tbox, indices, anch
|
173 |
+
|
asone/detectors/yolor/utils/metrics.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Model validation metrics
|
2 |
+
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
|
7 |
+
def fitness(x):
|
8 |
+
# Model fitness as a weighted combination of metrics
|
9 |
+
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
10 |
+
return (x[:, :4] * w).sum(1)
|
11 |
+
|
12 |
+
|
13 |
+
def fitness_p(x):
|
14 |
+
# Model fitness as a weighted combination of metrics
|
15 |
+
w = [1.0, 0.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
16 |
+
return (x[:, :4] * w).sum(1)
|
17 |
+
|
18 |
+
|
19 |
+
def fitness_r(x):
|
20 |
+
# Model fitness as a weighted combination of metrics
|
21 |
+
w = [0.0, 1.0, 0.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
22 |
+
return (x[:, :4] * w).sum(1)
|
23 |
+
|
24 |
+
|
25 |
+
def fitness_ap50(x):
|
26 |
+
# Model fitness as a weighted combination of metrics
|
27 |
+
w = [0.0, 0.0, 1.0, 0.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
28 |
+
return (x[:, :4] * w).sum(1)
|
29 |
+
|
30 |
+
|
31 |
+
def fitness_ap(x):
|
32 |
+
# Model fitness as a weighted combination of metrics
|
33 |
+
w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
34 |
+
return (x[:, :4] * w).sum(1)
|
35 |
+
|
36 |
+
|
37 |
+
def fitness_f(x):
|
38 |
+
# Model fitness as a weighted combination of metrics
|
39 |
+
#w = [0.0, 0.0, 0.0, 1.0] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
40 |
+
return ((x[:, 0]*x[:, 1])/(x[:, 0]+x[:, 1]))
|
41 |
+
|
42 |
+
|
43 |
+
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, fname='precision-recall_curve.png'):
|
44 |
+
""" Compute the average precision, given the recall and precision curves.
|
45 |
+
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
46 |
+
# Arguments
|
47 |
+
tp: True positives (nparray, nx1 or nx10).
|
48 |
+
conf: Objectness value from 0-1 (nparray).
|
49 |
+
pred_cls: Predicted object classes (nparray).
|
50 |
+
target_cls: True object classes (nparray).
|
51 |
+
plot: Plot precision-recall curve at mAP@0.5
|
52 |
+
fname: Plot filename
|
53 |
+
# Returns
|
54 |
+
The average precision as computed in py-faster-rcnn.
|
55 |
+
"""
|
56 |
+
|
57 |
+
# Sort by objectness
|
58 |
+
i = np.argsort(-conf)
|
59 |
+
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
|
60 |
+
|
61 |
+
# Find unique classes
|
62 |
+
unique_classes = np.unique(target_cls)
|
63 |
+
|
64 |
+
# Create Precision-Recall curve and compute AP for each class
|
65 |
+
px, py = np.linspace(0, 1, 1000), [] # for plotting
|
66 |
+
pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
|
67 |
+
s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
|
68 |
+
ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s)
|
69 |
+
for ci, c in enumerate(unique_classes):
|
70 |
+
i = pred_cls == c
|
71 |
+
n_l = (target_cls == c).sum() # number of labels
|
72 |
+
n_p = i.sum() # number of predictions
|
73 |
+
|
74 |
+
if n_p == 0 or n_l == 0:
|
75 |
+
continue
|
76 |
+
else:
|
77 |
+
# Accumulate FPs and TPs
|
78 |
+
fpc = (1 - tp[i]).cumsum(0)
|
79 |
+
tpc = tp[i].cumsum(0)
|
80 |
+
|
81 |
+
# Recall
|
82 |
+
recall = tpc / (n_l + 1e-16) # recall curve
|
83 |
+
r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases
|
84 |
+
|
85 |
+
# Precision
|
86 |
+
precision = tpc / (tpc + fpc) # precision curve
|
87 |
+
p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score
|
88 |
+
|
89 |
+
# AP from recall-precision curve
|
90 |
+
for j in range(tp.shape[1]):
|
91 |
+
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
|
92 |
+
if j == 0:
|
93 |
+
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
|
94 |
+
|
95 |
+
# Compute F1 score (harmonic mean of precision and recall)
|
96 |
+
f1 = 2 * p * r / (p + r + 1e-16)
|
97 |
+
|
98 |
+
if plot:
|
99 |
+
py = np.stack(py, axis=1)
|
100 |
+
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
|
101 |
+
ax.plot(px, py, linewidth=0.5, color='grey') # plot(recall, precision)
|
102 |
+
ax.plot(px, py.mean(1), linewidth=2, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
|
103 |
+
ax.set_xlabel('Recall')
|
104 |
+
ax.set_ylabel('Precision')
|
105 |
+
ax.set_xlim(0, 1)
|
106 |
+
ax.set_ylim(0, 1)
|
107 |
+
plt.legend()
|
108 |
+
fig.tight_layout()
|
109 |
+
fig.savefig(fname, dpi=200)
|
110 |
+
|
111 |
+
return p, r, ap, f1, unique_classes.astype('int32')
|
112 |
+
|
113 |
+
|
114 |
+
def compute_ap(recall, precision):
|
115 |
+
""" Compute the average precision, given the recall and precision curves.
|
116 |
+
Source: https://github.com/rbgirshick/py-faster-rcnn.
|
117 |
+
# Arguments
|
118 |
+
recall: The recall curve (list).
|
119 |
+
precision: The precision curve (list).
|
120 |
+
# Returns
|
121 |
+
The average precision as computed in py-faster-rcnn.
|
122 |
+
"""
|
123 |
+
|
124 |
+
# Append sentinel values to beginning and end
|
125 |
+
mrec = np.concatenate(([0.0], recall, [1.0]))
|
126 |
+
mpre = np.concatenate(([1.0], precision, [0.0]))
|
127 |
+
|
128 |
+
# Compute the precision envelope
|
129 |
+
mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
|
130 |
+
|
131 |
+
# Integrate area under curve
|
132 |
+
method = 'interp' # methods: 'continuous', 'interp'
|
133 |
+
if method == 'interp':
|
134 |
+
x = np.linspace(0, 1, 101) # 101-point interp (COCO)
|
135 |
+
ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
|
136 |
+
else: # 'continuous'
|
137 |
+
i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
|
138 |
+
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
|
139 |
+
|
140 |
+
return ap, mpre, mrec
|
asone/detectors/yolor/utils/parse_config.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
|
6 |
+
def parse_model_cfg(path):
|
7 |
+
# Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3'
|
8 |
+
if not path.endswith('.cfg'): # add .cfg suffix if omitted
|
9 |
+
path += '.cfg'
|
10 |
+
if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): # add cfg/ prefix if omitted
|
11 |
+
path = 'cfg' + os.sep + path
|
12 |
+
|
13 |
+
with open(path, 'r') as f:
|
14 |
+
lines = f.read().split('\n')
|
15 |
+
lines = [x for x in lines if x and not x.startswith('#')]
|
16 |
+
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
|
17 |
+
mdefs = [] # module definitions
|
18 |
+
for line in lines:
|
19 |
+
if line.startswith('['): # This marks the start of a new block
|
20 |
+
mdefs.append({})
|
21 |
+
mdefs[-1]['type'] = line[1:-1].rstrip()
|
22 |
+
if mdefs[-1]['type'] == 'convolutional':
|
23 |
+
mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later)
|
24 |
+
|
25 |
+
else:
|
26 |
+
key, val = line.split("=")
|
27 |
+
key = key.rstrip()
|
28 |
+
|
29 |
+
if key == 'anchors': # return nparray
|
30 |
+
mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors
|
31 |
+
elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val): # return array
|
32 |
+
mdefs[-1][key] = [int(x) for x in val.split(',')]
|
33 |
+
else:
|
34 |
+
val = val.strip()
|
35 |
+
if val.isnumeric(): # return int or float
|
36 |
+
mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val)
|
37 |
+
else:
|
38 |
+
mdefs[-1][key] = val # return string
|
39 |
+
|
40 |
+
# Check all fields are supported
|
41 |
+
supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups',
|
42 |
+
'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random',
|
43 |
+
'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind',
|
44 |
+
'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'atoms', 'na', 'nc']
|
45 |
+
|
46 |
+
f = [] # fields
|
47 |
+
for x in mdefs[1:]:
|
48 |
+
[f.append(k) for k in x if k not in f]
|
49 |
+
u = [x for x in f if x not in supported] # unsupported fields
|
50 |
+
assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path)
|
51 |
+
|
52 |
+
return mdefs
|
53 |
+
|
54 |
+
|
55 |
+
def parse_data_cfg(path):
|
56 |
+
# Parses the data configuration file
|
57 |
+
if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted
|
58 |
+
path = 'data' + os.sep + path
|
59 |
+
|
60 |
+
with open(path, 'r') as f:
|
61 |
+
lines = f.readlines()
|
62 |
+
|
63 |
+
options = dict()
|
64 |
+
for line in lines:
|
65 |
+
line = line.strip()
|
66 |
+
if line == '' or line.startswith('#'):
|
67 |
+
continue
|
68 |
+
key, val = line.split('=')
|
69 |
+
options[key.strip()] = val.strip()
|
70 |
+
|
71 |
+
return options
|
asone/detectors/yolor/utils/plots.py
ADDED
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Plotting utils
|
2 |
+
|
3 |
+
import glob
|
4 |
+
import math
|
5 |
+
import os
|
6 |
+
import random
|
7 |
+
from copy import copy
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
import cv2
|
11 |
+
import matplotlib
|
12 |
+
import matplotlib.pyplot as plt
|
13 |
+
import numpy as np
|
14 |
+
import torch
|
15 |
+
import yaml
|
16 |
+
from PIL import Image
|
17 |
+
from scipy.signal import butter, filtfilt
|
18 |
+
|
19 |
+
from asone.detectors.yolor.utils.general import xywh2xyxy, xyxy2xywh
|
20 |
+
from asone.detectors.yolor.utils.metrics import fitness
|
21 |
+
|
22 |
+
# Settings
|
23 |
+
matplotlib.use('Agg') # for writing to files only
|
24 |
+
|
25 |
+
|
26 |
+
def color_list():
|
27 |
+
# Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb
|
28 |
+
def hex2rgb(h):
|
29 |
+
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
|
30 |
+
|
31 |
+
return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']]
|
32 |
+
|
33 |
+
|
34 |
+
def hist2d(x, y, n=100):
|
35 |
+
# 2d histogram used in labels.png and evolve.png
|
36 |
+
xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
|
37 |
+
hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
|
38 |
+
xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
|
39 |
+
yidx = np.clip(np.digitize(y, yedges) - 1, 0, hist.shape[1] - 1)
|
40 |
+
return np.log(hist[xidx, yidx])
|
41 |
+
|
42 |
+
|
43 |
+
def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
|
44 |
+
# https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
|
45 |
+
def butter_lowpass(cutoff, fs, order):
|
46 |
+
nyq = 0.5 * fs
|
47 |
+
normal_cutoff = cutoff / nyq
|
48 |
+
return butter(order, normal_cutoff, btype='low', analog=False)
|
49 |
+
|
50 |
+
b, a = butter_lowpass(cutoff, fs, order=order)
|
51 |
+
return filtfilt(b, a, data) # forward-backward filter
|
52 |
+
|
53 |
+
|
54 |
+
def plot_one_box(x, img, color=None, label=None, line_thickness=None):
|
55 |
+
# Plots one bounding box on image img
|
56 |
+
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
|
57 |
+
color = color or [random.randint(0, 255) for _ in range(3)]
|
58 |
+
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
|
59 |
+
cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
|
60 |
+
if label:
|
61 |
+
tf = max(tl - 1, 1) # font thickness
|
62 |
+
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
|
63 |
+
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
|
64 |
+
cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
|
65 |
+
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
|
66 |
+
|
67 |
+
|
68 |
+
def plot_wh_methods(): # from utils.general import *; plot_wh_methods()
|
69 |
+
# Compares the two methods for width-height anchor multiplication
|
70 |
+
# https://github.com/ultralytics/yolov3/issues/168
|
71 |
+
x = np.arange(-4.0, 4.0, .1)
|
72 |
+
ya = np.exp(x)
|
73 |
+
yb = torch.sigmoid(torch.from_numpy(x)).numpy() * 2
|
74 |
+
|
75 |
+
fig = plt.figure(figsize=(6, 3), dpi=150)
|
76 |
+
plt.plot(x, ya, '.-', label='YOLO')
|
77 |
+
plt.plot(x, yb ** 2, '.-', label='YOLO ^2')
|
78 |
+
plt.plot(x, yb ** 1.6, '.-', label='YOLO ^1.6')
|
79 |
+
plt.xlim(left=-4, right=4)
|
80 |
+
plt.ylim(bottom=0, top=6)
|
81 |
+
plt.xlabel('input')
|
82 |
+
plt.ylabel('output')
|
83 |
+
plt.grid()
|
84 |
+
plt.legend()
|
85 |
+
fig.tight_layout()
|
86 |
+
fig.savefig('comparison.png', dpi=200)
|
87 |
+
|
88 |
+
|
89 |
+
def output_to_target(output, width, height):
|
90 |
+
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
|
91 |
+
if isinstance(output, torch.Tensor):
|
92 |
+
output = output.cpu().numpy()
|
93 |
+
|
94 |
+
targets = []
|
95 |
+
for i, o in enumerate(output):
|
96 |
+
if o is not None:
|
97 |
+
for pred in o:
|
98 |
+
box = pred[:4]
|
99 |
+
w = (box[2] - box[0]) / width
|
100 |
+
h = (box[3] - box[1]) / height
|
101 |
+
x = box[0] / width + w / 2
|
102 |
+
y = box[1] / height + h / 2
|
103 |
+
conf = pred[4]
|
104 |
+
cls = int(pred[5])
|
105 |
+
|
106 |
+
targets.append([i, cls, x, y, w, h, conf])
|
107 |
+
|
108 |
+
return np.array(targets)
|
109 |
+
|
110 |
+
|
111 |
+
def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
|
112 |
+
# Plot image grid with labels
|
113 |
+
|
114 |
+
if isinstance(images, torch.Tensor):
|
115 |
+
images = images.cpu().float().numpy()
|
116 |
+
if isinstance(targets, torch.Tensor):
|
117 |
+
targets = targets.cpu().numpy()
|
118 |
+
|
119 |
+
# un-normalise
|
120 |
+
if np.max(images[0]) <= 1:
|
121 |
+
images *= 255
|
122 |
+
|
123 |
+
tl = 3 # line thickness
|
124 |
+
tf = max(tl - 1, 1) # font thickness
|
125 |
+
bs, _, h, w = images.shape # batch size, _, height, width
|
126 |
+
bs = min(bs, max_subplots) # limit plot images
|
127 |
+
ns = np.ceil(bs ** 0.5) # number of subplots (square)
|
128 |
+
|
129 |
+
# Check if we should resize
|
130 |
+
scale_factor = max_size / max(h, w)
|
131 |
+
if scale_factor < 1:
|
132 |
+
h = math.ceil(scale_factor * h)
|
133 |
+
w = math.ceil(scale_factor * w)
|
134 |
+
|
135 |
+
colors = color_list() # list of colors
|
136 |
+
mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
|
137 |
+
for i, img in enumerate(images):
|
138 |
+
if i == max_subplots: # if last batch has fewer images than we expect
|
139 |
+
break
|
140 |
+
|
141 |
+
block_x = int(w * (i // ns))
|
142 |
+
block_y = int(h * (i % ns))
|
143 |
+
|
144 |
+
img = img.transpose(1, 2, 0)
|
145 |
+
if scale_factor < 1:
|
146 |
+
img = cv2.resize(img, (w, h))
|
147 |
+
|
148 |
+
mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
|
149 |
+
if len(targets) > 0:
|
150 |
+
image_targets = targets[targets[:, 0] == i]
|
151 |
+
boxes = xywh2xyxy(image_targets[:, 2:6]).T
|
152 |
+
classes = image_targets[:, 1].astype('int')
|
153 |
+
labels = image_targets.shape[1] == 6 # labels if no conf column
|
154 |
+
conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred)
|
155 |
+
|
156 |
+
boxes[[0, 2]] *= w
|
157 |
+
boxes[[0, 2]] += block_x
|
158 |
+
boxes[[1, 3]] *= h
|
159 |
+
boxes[[1, 3]] += block_y
|
160 |
+
for j, box in enumerate(boxes.T):
|
161 |
+
cls = int(classes[j])
|
162 |
+
color = colors[cls % len(colors)]
|
163 |
+
cls = names[cls] if names else cls
|
164 |
+
if labels or conf[j] > 0.25: # 0.25 conf thresh
|
165 |
+
label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
|
166 |
+
plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
|
167 |
+
|
168 |
+
# Draw image filename labels
|
169 |
+
if paths:
|
170 |
+
label = Path(paths[i]).name[:40] # trim to 40 char
|
171 |
+
t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
|
172 |
+
cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
|
173 |
+
lineType=cv2.LINE_AA)
|
174 |
+
|
175 |
+
# Image border
|
176 |
+
cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
|
177 |
+
|
178 |
+
if fname:
|
179 |
+
r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size
|
180 |
+
mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
|
181 |
+
# cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save
|
182 |
+
Image.fromarray(mosaic).save(fname) # PIL save
|
183 |
+
return mosaic
|
184 |
+
|
185 |
+
|
186 |
+
def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
|
187 |
+
# Plot LR simulating training for full epochs
|
188 |
+
optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals
|
189 |
+
y = []
|
190 |
+
for _ in range(epochs):
|
191 |
+
scheduler.step()
|
192 |
+
y.append(optimizer.param_groups[0]['lr'])
|
193 |
+
plt.plot(y, '.-', label='LR')
|
194 |
+
plt.xlabel('epoch')
|
195 |
+
plt.ylabel('LR')
|
196 |
+
plt.grid()
|
197 |
+
plt.xlim(0, epochs)
|
198 |
+
plt.ylim(0)
|
199 |
+
plt.tight_layout()
|
200 |
+
plt.savefig(Path(save_dir) / 'LR.png', dpi=200)
|
201 |
+
|
202 |
+
|
203 |
+
def plot_test_txt(): # from utils.general import *; plot_test()
|
204 |
+
# Plot test.txt histograms
|
205 |
+
x = np.loadtxt('test.txt', dtype=np.float32)
|
206 |
+
box = xyxy2xywh(x[:, :4])
|
207 |
+
cx, cy = box[:, 0], box[:, 1]
|
208 |
+
|
209 |
+
fig, ax = plt.subplots(1, 1, figsize=(6, 6), tight_layout=True)
|
210 |
+
ax.hist2d(cx, cy, bins=600, cmax=10, cmin=0)
|
211 |
+
ax.set_aspect('equal')
|
212 |
+
plt.savefig('hist2d.png', dpi=300)
|
213 |
+
|
214 |
+
fig, ax = plt.subplots(1, 2, figsize=(12, 6), tight_layout=True)
|
215 |
+
ax[0].hist(cx, bins=600)
|
216 |
+
ax[1].hist(cy, bins=600)
|
217 |
+
plt.savefig('hist1d.png', dpi=200)
|
218 |
+
|
219 |
+
|
220 |
+
def plot_targets_txt(): # from utils.general import *; plot_targets_txt()
|
221 |
+
# Plot targets.txt histograms
|
222 |
+
x = np.loadtxt('targets.txt', dtype=np.float32).T
|
223 |
+
s = ['x targets', 'y targets', 'width targets', 'height targets']
|
224 |
+
fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
|
225 |
+
ax = ax.ravel()
|
226 |
+
for i in range(4):
|
227 |
+
ax[i].hist(x[i], bins=100, label='%.3g +/- %.3g' % (x[i].mean(), x[i].std()))
|
228 |
+
ax[i].legend()
|
229 |
+
ax[i].set_title(s[i])
|
230 |
+
plt.savefig('targets.jpg', dpi=200)
|
231 |
+
|
232 |
+
|
233 |
+
def plot_study_txt(f='study.txt', x=None): # from utils.general import *; plot_study_txt()
|
234 |
+
# Plot study.txt generated by test.py
|
235 |
+
fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)
|
236 |
+
ax = ax.ravel()
|
237 |
+
|
238 |
+
fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
|
239 |
+
for f in ['study/study_coco_yolo%s.txt' % x for x in ['s', 'm', 'l', 'x']]:
|
240 |
+
y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T
|
241 |
+
x = np.arange(y.shape[1]) if x is None else np.array(x)
|
242 |
+
s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)']
|
243 |
+
for i in range(7):
|
244 |
+
ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)
|
245 |
+
ax[i].set_title(s[i])
|
246 |
+
|
247 |
+
j = y[3].argmax() + 1
|
248 |
+
ax2.plot(y[6, :j], y[3, :j] * 1E2, '.-', linewidth=2, markersize=8,
|
249 |
+
label=Path(f).stem.replace('study_coco_', '').replace('yolo', 'YOLO'))
|
250 |
+
|
251 |
+
ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],
|
252 |
+
'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet')
|
253 |
+
|
254 |
+
ax2.grid()
|
255 |
+
ax2.set_xlim(0, 30)
|
256 |
+
ax2.set_ylim(28, 50)
|
257 |
+
ax2.set_yticks(np.arange(30, 55, 5))
|
258 |
+
ax2.set_xlabel('GPU Speed (ms/img)')
|
259 |
+
ax2.set_ylabel('COCO AP val')
|
260 |
+
ax2.legend(loc='lower right')
|
261 |
+
plt.savefig('study_mAP_latency.png', dpi=300)
|
262 |
+
plt.savefig(f.replace('.txt', '.png'), dpi=300)
|
263 |
+
|
264 |
+
|
265 |
+
def plot_labels(labels, save_dir=''):
|
266 |
+
# plot dataset labels
|
267 |
+
c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes
|
268 |
+
nc = int(c.max() + 1) # number of classes
|
269 |
+
|
270 |
+
fig, ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)
|
271 |
+
ax = ax.ravel()
|
272 |
+
ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)
|
273 |
+
ax[0].set_xlabel('classes')
|
274 |
+
ax[1].scatter(b[0], b[1], c=hist2d(b[0], b[1], 90), cmap='jet')
|
275 |
+
ax[1].set_xlabel('x')
|
276 |
+
ax[1].set_ylabel('y')
|
277 |
+
ax[2].scatter(b[2], b[3], c=hist2d(b[2], b[3], 90), cmap='jet')
|
278 |
+
ax[2].set_xlabel('width')
|
279 |
+
ax[2].set_ylabel('height')
|
280 |
+
plt.savefig(Path(save_dir) / 'labels.png', dpi=200)
|
281 |
+
plt.close()
|
282 |
+
|
283 |
+
# seaborn correlogram
|
284 |
+
try:
|
285 |
+
import seaborn as sns
|
286 |
+
import pandas as pd
|
287 |
+
x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])
|
288 |
+
sns.pairplot(x, corner=True, diag_kind='hist', kind='scatter', markers='o',
|
289 |
+
plot_kws=dict(s=3, edgecolor=None, linewidth=1, alpha=0.02),
|
290 |
+
diag_kws=dict(bins=50))
|
291 |
+
plt.savefig(Path(save_dir) / 'labels_correlogram.png', dpi=200)
|
292 |
+
plt.close()
|
293 |
+
except Exception as e:
|
294 |
+
pass
|
295 |
+
|
296 |
+
|
297 |
+
def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.general import *; plot_evolution()
|
298 |
+
# Plot hyperparameter evolution results in evolve.txt
|
299 |
+
with open(yaml_file) as f:
|
300 |
+
hyp = yaml.load(f, Loader=yaml.FullLoader)
|
301 |
+
x = np.loadtxt('evolve.txt', ndmin=2)
|
302 |
+
f = fitness(x)
|
303 |
+
# weights = (f - f.min()) ** 2 # for weighted results
|
304 |
+
plt.figure(figsize=(10, 12), tight_layout=True)
|
305 |
+
matplotlib.rc('font', **{'size': 8})
|
306 |
+
for i, (k, v) in enumerate(hyp.items()):
|
307 |
+
y = x[:, i + 7]
|
308 |
+
# mu = (y * weights).sum() / weights.sum() # best weighted result
|
309 |
+
mu = y[f.argmax()] # best single result
|
310 |
+
plt.subplot(6, 5, i + 1)
|
311 |
+
plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none')
|
312 |
+
plt.plot(mu, f.max(), 'k+', markersize=15)
|
313 |
+
plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9}) # limit to 40 characters
|
314 |
+
if i % 5 != 0:
|
315 |
+
plt.yticks([])
|
316 |
+
print('%15s: %.3g' % (k, mu))
|
317 |
+
plt.savefig('evolve.png', dpi=200)
|
318 |
+
print('\nPlot saved as evolve.png')
|
319 |
+
|
320 |
+
|
321 |
+
def plot_results_overlay(start=0, stop=0): # from utils.general import *; plot_results_overlay()
|
322 |
+
# Plot training 'results*.txt', overlaying train and val losses
|
323 |
+
s = ['train', 'train', 'train', 'Precision', 'mAP@0.5', 'val', 'val', 'val', 'Recall', 'mAP@0.5:0.95'] # legends
|
324 |
+
t = ['Box', 'Objectness', 'Classification', 'P-R', 'mAP-F1'] # titles
|
325 |
+
for f in sorted(glob.glob('results*.txt') + glob.glob('../../Downloads/results*.txt')):
|
326 |
+
results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
|
327 |
+
n = results.shape[1] # number of rows
|
328 |
+
x = range(start, min(stop, n) if stop else n)
|
329 |
+
fig, ax = plt.subplots(1, 5, figsize=(14, 3.5), tight_layout=True)
|
330 |
+
ax = ax.ravel()
|
331 |
+
for i in range(5):
|
332 |
+
for j in [i, i + 5]:
|
333 |
+
y = results[j, x]
|
334 |
+
ax[i].plot(x, y, marker='.', label=s[j])
|
335 |
+
# y_smooth = butter_lowpass_filtfilt(y)
|
336 |
+
# ax[i].plot(x, np.gradient(y_smooth), marker='.', label=s[j])
|
337 |
+
|
338 |
+
ax[i].set_title(t[i])
|
339 |
+
ax[i].legend()
|
340 |
+
ax[i].set_ylabel(f) if i == 0 else None # add filename
|
341 |
+
fig.savefig(f.replace('.txt', '.png'), dpi=200)
|
342 |
+
|
343 |
+
|
344 |
+
def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''):
|
345 |
+
# from utils.general import *; plot_results(save_dir='runs/train/exp0')
|
346 |
+
# Plot training 'results*.txt'
|
347 |
+
fig, ax = plt.subplots(2, 5, figsize=(12, 6))
|
348 |
+
ax = ax.ravel()
|
349 |
+
s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall',
|
350 |
+
'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95']
|
351 |
+
if bucket:
|
352 |
+
# os.system('rm -rf storage.googleapis.com')
|
353 |
+
# files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id]
|
354 |
+
files = ['%g.txt' % x for x in id]
|
355 |
+
c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/%g.txt' % (bucket, x) for x in id)
|
356 |
+
os.system(c)
|
357 |
+
else:
|
358 |
+
files = glob.glob(str(Path(save_dir) / '*.txt')) + glob.glob('../../Downloads/results*.txt')
|
359 |
+
assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir)
|
360 |
+
for fi, f in enumerate(files):
|
361 |
+
try:
|
362 |
+
results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
|
363 |
+
n = results.shape[1] # number of rows
|
364 |
+
x = range(start, min(stop, n) if stop else n)
|
365 |
+
for i in range(10):
|
366 |
+
y = results[i, x]
|
367 |
+
if i in [0, 1, 2, 5, 6, 7]:
|
368 |
+
y[y == 0] = np.nan # don't show zero loss values
|
369 |
+
# y /= y[0] # normalize
|
370 |
+
label = labels[fi] if len(labels) else Path(f).stem
|
371 |
+
ax[i].plot(x, y, marker='.', label=label, linewidth=1, markersize=6)
|
372 |
+
ax[i].set_title(s[i])
|
373 |
+
# if i in [5, 6, 7]: # share train and val loss y axes
|
374 |
+
# ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
|
375 |
+
except Exception as e:
|
376 |
+
print('Warning: Plotting error for %s; %s' % (f, e))
|
377 |
+
|
378 |
+
fig.tight_layout()
|
379 |
+
ax[1].legend()
|
380 |
+
fig.savefig(Path(save_dir) / 'results.png', dpi=200)
|
asone/detectors/yolor/utils/torch_utils.py
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# PyTorch utils
|
2 |
+
|
3 |
+
import logging
|
4 |
+
import math
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
from contextlib import contextmanager
|
8 |
+
from copy import deepcopy
|
9 |
+
|
10 |
+
import torch
|
11 |
+
import torch.backends.cudnn as cudnn
|
12 |
+
import torch.nn as nn
|
13 |
+
import torch.nn.functional as F
|
14 |
+
import torchvision
|
15 |
+
|
16 |
+
logger = logging.getLogger(__name__)
|
17 |
+
|
18 |
+
|
19 |
+
@contextmanager
|
20 |
+
def torch_distributed_zero_first(local_rank: int):
|
21 |
+
"""
|
22 |
+
Decorator to make all processes in distributed training wait for each local_master to do something.
|
23 |
+
"""
|
24 |
+
if local_rank not in [-1, 0]:
|
25 |
+
torch.distributed.barrier()
|
26 |
+
yield
|
27 |
+
if local_rank == 0:
|
28 |
+
torch.distributed.barrier()
|
29 |
+
|
30 |
+
|
31 |
+
def init_torch_seeds(seed=0):
|
32 |
+
# Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
|
33 |
+
torch.manual_seed(seed)
|
34 |
+
if seed == 0: # slower, more reproducible
|
35 |
+
cudnn.deterministic = True
|
36 |
+
cudnn.benchmark = False
|
37 |
+
else: # faster, less reproducible
|
38 |
+
cudnn.deterministic = False
|
39 |
+
cudnn.benchmark = True
|
40 |
+
|
41 |
+
|
42 |
+
def select_device(device='', batch_size=None):
|
43 |
+
# device = 'cpu' or '0' or '0,1,2,3'
|
44 |
+
cpu_request = device.lower() == 'cpu'
|
45 |
+
if device and not cpu_request: # if device requested other than 'cpu'
|
46 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
|
47 |
+
assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity
|
48 |
+
|
49 |
+
cuda = False if cpu_request else torch.cuda.is_available()
|
50 |
+
if cuda:
|
51 |
+
c = 1024 ** 2 # bytes to MB
|
52 |
+
ng = torch.cuda.device_count()
|
53 |
+
if ng > 1 and batch_size: # check that batch_size is compatible with device_count
|
54 |
+
assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
|
55 |
+
x = [torch.cuda.get_device_properties(i) for i in range(ng)]
|
56 |
+
s = f'Using torch {torch.__version__} '
|
57 |
+
for i in range(0, ng):
|
58 |
+
if i == 1:
|
59 |
+
s = ' ' * len(s)
|
60 |
+
logger.info("%sCUDA:%g (%s, %dMB)" % (s, i, x[i].name, x[i].total_memory / c))
|
61 |
+
else:
|
62 |
+
logger.info(f'Using torch {torch.__version__} CPU')
|
63 |
+
|
64 |
+
logger.info('') # skip a line
|
65 |
+
return torch.device('cuda:0' if cuda else 'cpu')
|
66 |
+
|
67 |
+
|
68 |
+
def time_synchronized():
|
69 |
+
torch.cuda.synchronize() if torch.cuda.is_available() else None
|
70 |
+
return time.time()
|
71 |
+
|
72 |
+
|
73 |
+
def is_parallel(model):
|
74 |
+
return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
|
75 |
+
|
76 |
+
|
77 |
+
def intersect_dicts(da, db, exclude=()):
|
78 |
+
# Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
|
79 |
+
return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
|
80 |
+
|
81 |
+
|
82 |
+
def initialize_weights(model):
|
83 |
+
for m in model.modules():
|
84 |
+
t = type(m)
|
85 |
+
if t is nn.Conv2d:
|
86 |
+
pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
87 |
+
elif t is nn.BatchNorm2d:
|
88 |
+
m.eps = 1e-3
|
89 |
+
m.momentum = 0.03
|
90 |
+
elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
|
91 |
+
m.inplace = True
|
92 |
+
|
93 |
+
|
94 |
+
def find_modules(model, mclass=nn.Conv2d):
|
95 |
+
# Finds layer indices matching module class 'mclass'
|
96 |
+
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
|
97 |
+
|
98 |
+
|
99 |
+
def sparsity(model):
|
100 |
+
# Return global model sparsity
|
101 |
+
a, b = 0., 0.
|
102 |
+
for p in model.parameters():
|
103 |
+
a += p.numel()
|
104 |
+
b += (p == 0).sum()
|
105 |
+
return b / a
|
106 |
+
|
107 |
+
|
108 |
+
def prune(model, amount=0.3):
|
109 |
+
# Prune model to requested global sparsity
|
110 |
+
import torch.nn.utils.prune as prune
|
111 |
+
print('Pruning model... ', end='')
|
112 |
+
for name, m in model.named_modules():
|
113 |
+
if isinstance(m, nn.Conv2d):
|
114 |
+
prune.l1_unstructured(m, name='weight', amount=amount) # prune
|
115 |
+
prune.remove(m, 'weight') # make permanent
|
116 |
+
print(' %.3g global sparsity' % sparsity(model))
|
117 |
+
|
118 |
+
|
119 |
+
def fuse_conv_and_bn(conv, bn):
|
120 |
+
# Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
|
121 |
+
fusedconv = nn.Conv2d(conv.in_channels,
|
122 |
+
conv.out_channels,
|
123 |
+
kernel_size=conv.kernel_size,
|
124 |
+
stride=conv.stride,
|
125 |
+
padding=conv.padding,
|
126 |
+
groups=conv.groups,
|
127 |
+
bias=True).requires_grad_(False).to(conv.weight.device)
|
128 |
+
|
129 |
+
# prepare filters
|
130 |
+
w_conv = conv.weight.clone().view(conv.out_channels, -1)
|
131 |
+
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
|
132 |
+
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
|
133 |
+
|
134 |
+
# prepare spatial bias
|
135 |
+
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
|
136 |
+
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
|
137 |
+
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
|
138 |
+
|
139 |
+
return fusedconv
|
140 |
+
|
141 |
+
|
142 |
+
def model_info(model, verbose=False, img_size=640):
|
143 |
+
# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
|
144 |
+
n_p = sum(x.numel() for x in model.parameters()) # number parameters
|
145 |
+
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
|
146 |
+
if verbose:
|
147 |
+
print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
|
148 |
+
for i, (name, p) in enumerate(model.named_parameters()):
|
149 |
+
name = name.replace('module_list.', '')
|
150 |
+
print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
|
151 |
+
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
|
152 |
+
|
153 |
+
try: # FLOPS
|
154 |
+
from thop import profile
|
155 |
+
flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, img_size, img_size),), verbose=False)[0] / 1E9 * 2
|
156 |
+
img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float
|
157 |
+
fs = ', %.9f GFLOPS' % (flops) # 640x640 FLOPS
|
158 |
+
except (ImportError, Exception):
|
159 |
+
fs = ''
|
160 |
+
|
161 |
+
logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
|
162 |
+
|
163 |
+
|
164 |
+
def load_classifier(name='resnet101', n=2):
|
165 |
+
# Loads a pretrained model reshaped to n-class output
|
166 |
+
model = torchvision.models.__dict__[name](pretrained=True)
|
167 |
+
|
168 |
+
# ResNet model properties
|
169 |
+
# input_size = [3, 224, 224]
|
170 |
+
# input_space = 'RGB'
|
171 |
+
# input_range = [0, 1]
|
172 |
+
# mean = [0.485, 0.456, 0.406]
|
173 |
+
# std = [0.229, 0.224, 0.225]
|
174 |
+
|
175 |
+
# Reshape output to n classes
|
176 |
+
filters = model.fc.weight.shape[1]
|
177 |
+
model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
|
178 |
+
model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
|
179 |
+
model.fc.out_features = n
|
180 |
+
return model
|
181 |
+
|
182 |
+
|
183 |
+
def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio
|
184 |
+
# scales img(bs,3,y,x) by ratio
|
185 |
+
if ratio == 1.0:
|
186 |
+
return img
|
187 |
+
else:
|
188 |
+
h, w = img.shape[2:]
|
189 |
+
s = (int(h * ratio), int(w * ratio)) # new size
|
190 |
+
img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
|
191 |
+
if not same_shape: # pad/crop img
|
192 |
+
gs = 32 # (pixels) grid size
|
193 |
+
h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
|
194 |
+
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
|
195 |
+
|
196 |
+
|
197 |
+
def copy_attr(a, b, include=(), exclude=()):
|
198 |
+
# Copy attributes from b to a, options to only include [...] and to exclude [...]
|
199 |
+
for k, v in b.__dict__.items():
|
200 |
+
if (len(include) and k not in include) or k.startswith('_') or k in exclude:
|
201 |
+
continue
|
202 |
+
else:
|
203 |
+
setattr(a, k, v)
|
204 |
+
|
205 |
+
|
206 |
+
class ModelEMA:
|
207 |
+
""" Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
|
208 |
+
Keep a moving average of everything in the model state_dict (parameters and buffers).
|
209 |
+
This is intended to allow functionality like
|
210 |
+
https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
|
211 |
+
A smoothed version of the weights is necessary for some training schemes to perform well.
|
212 |
+
This class is sensitive where it is initialized in the sequence of model init,
|
213 |
+
GPU assignment and distributed training wrappers.
|
214 |
+
"""
|
215 |
+
|
216 |
+
def __init__(self, model, decay=0.9999, updates=0):
|
217 |
+
# Create EMA
|
218 |
+
self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA
|
219 |
+
# if next(model.parameters()).device.type != 'cpu':
|
220 |
+
# self.ema.half() # FP16 EMA
|
221 |
+
self.updates = updates # number of EMA updates
|
222 |
+
self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
|
223 |
+
for p in self.ema.parameters():
|
224 |
+
p.requires_grad_(False)
|
225 |
+
|
226 |
+
def update(self, model):
|
227 |
+
# Update EMA parameters
|
228 |
+
with torch.no_grad():
|
229 |
+
self.updates += 1
|
230 |
+
d = self.decay(self.updates)
|
231 |
+
|
232 |
+
msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
|
233 |
+
for k, v in self.ema.state_dict().items():
|
234 |
+
if v.dtype.is_floating_point:
|
235 |
+
v *= d
|
236 |
+
v += (1. - d) * msd[k].detach()
|
237 |
+
|
238 |
+
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
|
239 |
+
# Update EMA attributes
|
240 |
+
copy_attr(self.ema, model, include, exclude)
|
asone/detectors/yolor/utils/yolor_utils.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torchvision
|
3 |
+
import time
|
4 |
+
import numpy as np
|
5 |
+
import cv2
|
6 |
+
|
7 |
+
|
8 |
+
class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
|
9 |
+
|
10 |
+
# Create a list of colors for each class where each color is a tuple of 3 integer values
|
11 |
+
rng = np.random.default_rng(3)
|
12 |
+
colors = rng.uniform(0, 255, size=(len(class_names), 3))
|
13 |
+
|
14 |
+
def box_area(box):
|
15 |
+
# box = xyxy(4,n)
|
16 |
+
return (box[2] - box[0]) * (box[3] - box[1])
|
17 |
+
|
18 |
+
|
19 |
+
def box_iou(box1, box2, eps=1e-7):
|
20 |
+
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
|
21 |
+
"""
|
22 |
+
Return intersection-over-union (Jaccard index) of boxes.
|
23 |
+
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
|
24 |
+
Arguments:
|
25 |
+
box1 (Tensor[N, 4])
|
26 |
+
box2 (Tensor[M, 4])
|
27 |
+
Returns:
|
28 |
+
iou (Tensor[N, M]): the NxM matrix containing the pairwise
|
29 |
+
IoU values for every element in boxes1 and boxes2
|
30 |
+
"""
|
31 |
+
|
32 |
+
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
|
33 |
+
(a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
|
34 |
+
inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
|
35 |
+
|
36 |
+
# IoU = inter / (area1 + area2 - inter)
|
37 |
+
return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps)
|
38 |
+
|
39 |
+
def xywh2xyxy(x):
|
40 |
+
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
41 |
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
42 |
+
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
|
43 |
+
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
|
44 |
+
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
|
45 |
+
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
|
46 |
+
return y
|
47 |
+
|
48 |
+
def non_max_suppression(prediction,
|
49 |
+
conf_thres=0.25,
|
50 |
+
iou_thres=0.45,
|
51 |
+
classes=None,
|
52 |
+
agnostic=False,
|
53 |
+
multi_label=False,
|
54 |
+
labels=(),
|
55 |
+
max_det=300):
|
56 |
+
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
|
57 |
+
Returns:
|
58 |
+
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
|
59 |
+
"""
|
60 |
+
# prediction = torch.Tensor(prediction)
|
61 |
+
bs = prediction.shape[0] # batch size
|
62 |
+
nc = prediction.shape[2] - 5 # number of classes
|
63 |
+
xc = prediction[..., 4] > conf_thres # candidates
|
64 |
+
# Checks
|
65 |
+
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
66 |
+
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
67 |
+
|
68 |
+
# Settings
|
69 |
+
# min_wh = 2 # (pixels) minimum box width and height
|
70 |
+
max_wh = 7680 # (pixels) maximum box width and height
|
71 |
+
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
|
72 |
+
time_limit = 0.3 + 0.03 * bs # seconds to quit after
|
73 |
+
redundant = True # require redundant detections
|
74 |
+
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
75 |
+
merge = False # use merge-NMS
|
76 |
+
|
77 |
+
t = time.time()
|
78 |
+
output = [torch.zeros((0, 6), device=prediction.device)] * bs
|
79 |
+
for xi, x in enumerate(prediction): # image index, image inference
|
80 |
+
# Apply constraints
|
81 |
+
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
82 |
+
x = x[xc[xi]] # confidence
|
83 |
+
|
84 |
+
# Cat apriori labels if autolabelling
|
85 |
+
if labels and len(labels[xi]):
|
86 |
+
lb = labels[xi]
|
87 |
+
v = torch.zeros((len(lb), nc + 5), device=x.device)
|
88 |
+
v[:, :4] = lb[:, 1:5] # box
|
89 |
+
v[:, 4] = 1.0 # conf
|
90 |
+
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
|
91 |
+
x = torch.cat((x, v), 0)
|
92 |
+
|
93 |
+
# If none remain process next image
|
94 |
+
if not x.shape[0]:
|
95 |
+
continue
|
96 |
+
|
97 |
+
# Compute conf
|
98 |
+
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
|
99 |
+
|
100 |
+
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
|
101 |
+
# print(type(x))
|
102 |
+
box = xywh2xyxy(x[:, :4])
|
103 |
+
|
104 |
+
# Detections matrix nx6 (xyxy, conf, cls)
|
105 |
+
if multi_label:
|
106 |
+
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
|
107 |
+
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
|
108 |
+
else: # best class only
|
109 |
+
conf, j = x[:, 5:].max(1, keepdim=True)
|
110 |
+
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
|
111 |
+
|
112 |
+
# Filter by class
|
113 |
+
if classes is not None:
|
114 |
+
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
115 |
+
|
116 |
+
# Apply finite constraint
|
117 |
+
# if not torch.isfinite(x).all():
|
118 |
+
# x = x[torch.isfinite(x).all(1)]
|
119 |
+
|
120 |
+
# Check shape
|
121 |
+
n = x.shape[0] # number of boxes
|
122 |
+
if not n: # no boxes
|
123 |
+
continue
|
124 |
+
elif n > max_nms: # excess boxes
|
125 |
+
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
|
126 |
+
|
127 |
+
# Batched NMS
|
128 |
+
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
129 |
+
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
130 |
+
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
131 |
+
if i.shape[0] > max_det: # limit detections
|
132 |
+
i = i[:max_det]
|
133 |
+
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
134 |
+
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
135 |
+
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
136 |
+
weights = iou * scores[None] # box weights
|
137 |
+
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
138 |
+
if redundant:
|
139 |
+
i = i[iou.sum(1) > 1] # require redundancy
|
140 |
+
|
141 |
+
output[xi] = x[i]
|
142 |
+
if (time.time() - t) > time_limit:
|
143 |
+
# LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
|
144 |
+
break # time limit exceeded
|
145 |
+
|
146 |
+
return output
|
147 |
+
|
148 |
+
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
149 |
+
# Resize and pad image while meeting stride-multiple constraints
|
150 |
+
shape = im.shape[:2] # current shape [height, width]
|
151 |
+
if isinstance(new_shape, int):
|
152 |
+
new_shape = (new_shape, new_shape)
|
153 |
+
|
154 |
+
# Scale ratio (new / old)
|
155 |
+
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
156 |
+
if not scaleup: # only scale down, do not scale up (for better val mAP)
|
157 |
+
r = min(r, 1.0)
|
158 |
+
|
159 |
+
# Compute padding
|
160 |
+
ratio = r, r # width, height ratios
|
161 |
+
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
162 |
+
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
163 |
+
if auto: # minimum rectangle
|
164 |
+
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
165 |
+
elif scaleFill: # stretch
|
166 |
+
dw, dh = 0.0, 0.0
|
167 |
+
new_unpad = (new_shape[1], new_shape[0])
|
168 |
+
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
|
169 |
+
|
170 |
+
dw /= 2 # divide padding into 2 sides
|
171 |
+
dh /= 2
|
172 |
+
|
173 |
+
if shape[::-1] != new_unpad: # resize
|
174 |
+
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
|
175 |
+
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
176 |
+
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
177 |
+
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
178 |
+
return im, ratio, (dw, dh)
|
179 |
+
|
180 |
+
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
|
181 |
+
# Rescale coords (xyxy) from img1_shape to img0_shape
|
182 |
+
if ratio_pad is None: # calculate from img0_shape
|
183 |
+
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
184 |
+
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
185 |
+
else:
|
186 |
+
gain = ratio_pad[0][0]
|
187 |
+
pad = ratio_pad[1]
|
188 |
+
|
189 |
+
coords[:, [0, 2]] -= pad[0] # x padding
|
190 |
+
coords[:, [1, 3]] -= pad[1] # y padding
|
191 |
+
coords[:, :4] /= gain
|
192 |
+
clip_coords(coords, img0_shape)
|
193 |
+
return coords
|
194 |
+
|
195 |
+
|
196 |
+
def clip_coords(boxes, shape):
|
197 |
+
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
198 |
+
if isinstance(boxes, torch.Tensor): # faster individually
|
199 |
+
boxes[:, 0].clamp_(0, shape[1]) # x1
|
200 |
+
boxes[:, 1].clamp_(0, shape[0]) # y1
|
201 |
+
boxes[:, 2].clamp_(0, shape[1]) # x2
|
202 |
+
boxes[:, 3].clamp_(0, shape[0]) # y2
|
203 |
+
else: # np.array (faster grouped)
|
204 |
+
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
|
205 |
+
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
|
206 |
+
|
asone/detectors/yolor/yolor_detector.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
from asone.utils import get_names
|
4 |
+
import numpy as np
|
5 |
+
import warnings
|
6 |
+
import torch
|
7 |
+
import onnxruntime
|
8 |
+
|
9 |
+
from .models.models import *
|
10 |
+
from asone import utils
|
11 |
+
from asone.detectors.yolor.utils.yolor_utils import (non_max_suppression,
|
12 |
+
scale_coords,
|
13 |
+
letterbox)
|
14 |
+
|
15 |
+
|
16 |
+
class YOLOrDetector:
|
17 |
+
def __init__(self,
|
18 |
+
weights=None,
|
19 |
+
cfg=None,
|
20 |
+
use_onnx=True,
|
21 |
+
use_cuda=True,
|
22 |
+
):
|
23 |
+
|
24 |
+
self.use_onnx = use_onnx
|
25 |
+
self.device = 'cuda' if use_cuda else 'cpu'
|
26 |
+
|
27 |
+
if not os.path.exists(weights):
|
28 |
+
utils.download_weights(weights)
|
29 |
+
|
30 |
+
if cfg == None:
|
31 |
+
cfg = os.path.join("cfg", "yolor_p6.cfg")
|
32 |
+
# If incase weighst is a list of paths then select path at first index
|
33 |
+
weights = str(weights[0] if isinstance(weights, list) else weights)
|
34 |
+
# Load Model
|
35 |
+
self.model = self.load_model(use_cuda, weights, cfg=cfg, img_size=640)
|
36 |
+
|
37 |
+
def load_model(self, use_cuda, weights, cfg, img_size, fp16=False):
|
38 |
+
# Device: CUDA and if fp16=True only then half precision floating point works
|
39 |
+
self.fp16 = fp16 & (
|
40 |
+
(not self.use_onnx or self.use_onnx) and self.device != 'cpu')
|
41 |
+
# Load onnx
|
42 |
+
if self.use_onnx:
|
43 |
+
if use_cuda:
|
44 |
+
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
45 |
+
else:
|
46 |
+
providers = ['CPUExecutionProvider']
|
47 |
+
model = onnxruntime.InferenceSession(weights, providers=providers)
|
48 |
+
# Load Pytorch
|
49 |
+
else:
|
50 |
+
model = Darknet(cfg, img_size).to(self.device)
|
51 |
+
model.load_state_dict(torch.load(
|
52 |
+
weights, map_location=self.device)['model'])
|
53 |
+
model.to(self.device).eval()
|
54 |
+
model.half() if self.fp16 else model.float()
|
55 |
+
return model
|
56 |
+
|
57 |
+
def image_preprocessing(self,
|
58 |
+
image: list,
|
59 |
+
input_shape=(640, 640)) -> list:
|
60 |
+
|
61 |
+
original_image = image.copy()
|
62 |
+
image = letterbox(image, input_shape, stride=32, auto=False)[0]
|
63 |
+
image = image.transpose((2, 0, 1))[::-1]
|
64 |
+
image = np.ascontiguousarray(image, dtype=np.float32)
|
65 |
+
image /= 255 # 0 - 255 to 0.0 - 1.0
|
66 |
+
if len(image.shape) == 3:
|
67 |
+
image = image[None] # expand for batch dim
|
68 |
+
return original_image, image
|
69 |
+
|
70 |
+
def detect(self, image: list,
|
71 |
+
input_shape: tuple = (640, 640),
|
72 |
+
conf_thres: float = 0.25,
|
73 |
+
iou_thres: float = 0.45,
|
74 |
+
max_det: int = 1000,
|
75 |
+
filter_classes: bool = None,
|
76 |
+
agnostic_nms: bool = True,
|
77 |
+
with_p6: bool = False) -> list:
|
78 |
+
|
79 |
+
# Image Preprocessing
|
80 |
+
original_image, processed_image = self.image_preprocessing(
|
81 |
+
image, input_shape)
|
82 |
+
|
83 |
+
# Inference
|
84 |
+
if self.use_onnx:
|
85 |
+
# Input names of ONNX model on which it is exported
|
86 |
+
input_name = self.model.get_inputs()[0].name
|
87 |
+
# Run onnx model
|
88 |
+
pred = self.model.run([self.model.get_outputs()[0].name], {
|
89 |
+
input_name: processed_image})[0]
|
90 |
+
# Run Pytorch model
|
91 |
+
else:
|
92 |
+
processed_image = torch.from_numpy(processed_image).to(self.device)
|
93 |
+
# Change image floating point precision if fp16 set to true
|
94 |
+
processed_image = processed_image.half() if self.fp16 else processed_image.float()
|
95 |
+
pred = self.model(processed_image, augment=False)[0]
|
96 |
+
pred = pred.detach().cpu().numpy()
|
97 |
+
|
98 |
+
if isinstance(pred, np.ndarray):
|
99 |
+
pred = torch.tensor(pred, device=self.device)
|
100 |
+
predictions = non_max_suppression(
|
101 |
+
pred, conf_thres,
|
102 |
+
iou_thres,
|
103 |
+
agnostic=agnostic_nms,
|
104 |
+
max_det=max_det)
|
105 |
+
|
106 |
+
for i, prediction in enumerate(predictions): # per image
|
107 |
+
if len(prediction):
|
108 |
+
prediction[:, :4] = scale_coords(
|
109 |
+
processed_image.shape[2:], prediction[:, :4], original_image.shape).round()
|
110 |
+
predictions[i] = prediction
|
111 |
+
|
112 |
+
predictions = predictions[0].cpu().numpy()
|
113 |
+
image_info = {
|
114 |
+
'width': original_image.shape[1],
|
115 |
+
'height': original_image.shape[0],
|
116 |
+
}
|
117 |
+
|
118 |
+
self.boxes = predictions[:, :4]
|
119 |
+
self.scores = predictions[:, 4:5]
|
120 |
+
self.class_ids = predictions[:, 5:6]
|
121 |
+
|
122 |
+
if filter_classes:
|
123 |
+
class_names = get_names()
|
124 |
+
|
125 |
+
filter_class_idx = []
|
126 |
+
if filter_classes:
|
127 |
+
for _class in filter_classes:
|
128 |
+
if _class.lower() in class_names:
|
129 |
+
filter_class_idx.append(
|
130 |
+
class_names.index(_class.lower()))
|
131 |
+
else:
|
132 |
+
warnings.warn(
|
133 |
+
f"class {_class} not found in model classes list.")
|
134 |
+
|
135 |
+
detection = detection[np.in1d(
|
136 |
+
detection[:, 5].astype(int), filter_class_idx)]
|
137 |
+
|
138 |
+
return predictions, image_info
|
asone/detectors/yolov5/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .yolov5_detector import YOLOv5Detector
|
2 |
+
__all__ = ['YOLOv5Detector']
|
asone/detectors/yolov5/yolov5/__init__.py
ADDED
File without changes
|
asone/detectors/yolov5/yolov5/models/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
sys.path.append(os.path.dirname(__file__))
|
asone/detectors/yolov5/yolov5/models/common.py
ADDED
@@ -0,0 +1,756 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
2 |
+
"""
|
3 |
+
Common modules
|
4 |
+
"""
|
5 |
+
|
6 |
+
import json
|
7 |
+
import math
|
8 |
+
import platform
|
9 |
+
import warnings
|
10 |
+
from collections import OrderedDict, namedtuple
|
11 |
+
from copy import copy
|
12 |
+
from pathlib import Path
|
13 |
+
|
14 |
+
import cv2
|
15 |
+
import numpy as np
|
16 |
+
import pandas as pd
|
17 |
+
import requests
|
18 |
+
import torch
|
19 |
+
import torch.nn as nn
|
20 |
+
import yaml
|
21 |
+
from PIL import Image
|
22 |
+
from torch.cuda import amp
|
23 |
+
|
24 |
+
from asone.detectors.yolov5.yolov5.models.general import (LOGGER, check_requirements,
|
25 |
+
check_suffix, check_version,
|
26 |
+
colorstr, increment_path)
|
27 |
+
|
28 |
+
def autopad(k, p=None): # kernel, padding
|
29 |
+
# Pad to 'same'
|
30 |
+
if p is None:
|
31 |
+
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
32 |
+
return p
|
33 |
+
|
34 |
+
|
35 |
+
class Conv(nn.Module):
|
36 |
+
# Standard convolution
|
37 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
38 |
+
super().__init__()
|
39 |
+
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
40 |
+
self.bn = nn.BatchNorm2d(c2)
|
41 |
+
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
|
42 |
+
|
43 |
+
def forward(self, x):
|
44 |
+
return self.act(self.bn(self.conv(x)))
|
45 |
+
|
46 |
+
def forward_fuse(self, x):
|
47 |
+
return self.act(self.conv(x))
|
48 |
+
|
49 |
+
|
50 |
+
class DWConv(Conv):
|
51 |
+
# Depth-wise convolution class
|
52 |
+
def __init__(self, c1, c2, k=1, s=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
53 |
+
super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
54 |
+
|
55 |
+
|
56 |
+
class DWConvTranspose2d(nn.ConvTranspose2d):
|
57 |
+
# Depth-wise transpose convolution class
|
58 |
+
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
|
59 |
+
super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
|
60 |
+
|
61 |
+
|
62 |
+
class TransformerLayer(nn.Module):
|
63 |
+
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
|
64 |
+
def __init__(self, c, num_heads):
|
65 |
+
super().__init__()
|
66 |
+
self.q = nn.Linear(c, c, bias=False)
|
67 |
+
self.k = nn.Linear(c, c, bias=False)
|
68 |
+
self.v = nn.Linear(c, c, bias=False)
|
69 |
+
self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
|
70 |
+
self.fc1 = nn.Linear(c, c, bias=False)
|
71 |
+
self.fc2 = nn.Linear(c, c, bias=False)
|
72 |
+
|
73 |
+
def forward(self, x):
|
74 |
+
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
|
75 |
+
x = self.fc2(self.fc1(x)) + x
|
76 |
+
return x
|
77 |
+
|
78 |
+
|
79 |
+
class TransformerBlock(nn.Module):
|
80 |
+
# Vision Transformer https://arxiv.org/abs/2010.11929
|
81 |
+
def __init__(self, c1, c2, num_heads, num_layers):
|
82 |
+
super().__init__()
|
83 |
+
self.conv = None
|
84 |
+
if c1 != c2:
|
85 |
+
self.conv = Conv(c1, c2)
|
86 |
+
self.linear = nn.Linear(c2, c2) # learnable position embedding
|
87 |
+
self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
|
88 |
+
self.c2 = c2
|
89 |
+
|
90 |
+
def forward(self, x):
|
91 |
+
if self.conv is not None:
|
92 |
+
x = self.conv(x)
|
93 |
+
b, _, w, h = x.shape
|
94 |
+
p = x.flatten(2).permute(2, 0, 1)
|
95 |
+
return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
|
96 |
+
|
97 |
+
|
98 |
+
class Bottleneck(nn.Module):
|
99 |
+
# Standard bottleneck
|
100 |
+
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
101 |
+
super().__init__()
|
102 |
+
c_ = int(c2 * e) # hidden channels
|
103 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
104 |
+
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
105 |
+
self.add = shortcut and c1 == c2
|
106 |
+
|
107 |
+
def forward(self, x):
|
108 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
109 |
+
|
110 |
+
|
111 |
+
class BottleneckCSP(nn.Module):
|
112 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
113 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
114 |
+
super().__init__()
|
115 |
+
c_ = int(c2 * e) # hidden channels
|
116 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
117 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
118 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
119 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
120 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
121 |
+
self.act = nn.SiLU()
|
122 |
+
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
123 |
+
|
124 |
+
def forward(self, x):
|
125 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
126 |
+
y2 = self.cv2(x)
|
127 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
|
128 |
+
|
129 |
+
|
130 |
+
class CrossConv(nn.Module):
|
131 |
+
# Cross Convolution Downsample
|
132 |
+
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
133 |
+
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
134 |
+
super().__init__()
|
135 |
+
c_ = int(c2 * e) # hidden channels
|
136 |
+
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
137 |
+
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
138 |
+
self.add = shortcut and c1 == c2
|
139 |
+
|
140 |
+
def forward(self, x):
|
141 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
142 |
+
|
143 |
+
|
144 |
+
class C3(nn.Module):
|
145 |
+
# CSP Bottleneck with 3 convolutions
|
146 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
147 |
+
super().__init__()
|
148 |
+
c_ = int(c2 * e) # hidden channels
|
149 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
150 |
+
self.cv2 = Conv(c1, c_, 1, 1)
|
151 |
+
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
|
152 |
+
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
153 |
+
|
154 |
+
def forward(self, x):
|
155 |
+
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
|
156 |
+
|
157 |
+
|
158 |
+
class C3x(C3):
|
159 |
+
# C3 module with cross-convolutions
|
160 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
161 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
162 |
+
c_ = int(c2 * e)
|
163 |
+
self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
|
164 |
+
|
165 |
+
|
166 |
+
class C3TR(C3):
|
167 |
+
# C3 module with TransformerBlock()
|
168 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
169 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
170 |
+
c_ = int(c2 * e)
|
171 |
+
self.m = TransformerBlock(c_, c_, 4, n)
|
172 |
+
|
173 |
+
|
174 |
+
class C3SPP(C3):
|
175 |
+
# C3 module with SPP()
|
176 |
+
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
|
177 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
178 |
+
c_ = int(c2 * e)
|
179 |
+
self.m = SPP(c_, c_, k)
|
180 |
+
|
181 |
+
|
182 |
+
class C3Ghost(C3):
|
183 |
+
# C3 module with GhostBottleneck()
|
184 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
185 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
186 |
+
c_ = int(c2 * e) # hidden channels
|
187 |
+
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
|
188 |
+
|
189 |
+
|
190 |
+
class SPP(nn.Module):
|
191 |
+
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
|
192 |
+
def __init__(self, c1, c2, k=(5, 9, 13)):
|
193 |
+
super().__init__()
|
194 |
+
c_ = c1 // 2 # hidden channels
|
195 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
196 |
+
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
197 |
+
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
198 |
+
|
199 |
+
def forward(self, x):
|
200 |
+
x = self.cv1(x)
|
201 |
+
with warnings.catch_warnings():
|
202 |
+
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
203 |
+
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
204 |
+
|
205 |
+
|
206 |
+
class SPPF(nn.Module):
|
207 |
+
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
|
208 |
+
def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
|
209 |
+
super().__init__()
|
210 |
+
c_ = c1 // 2 # hidden channels
|
211 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
212 |
+
self.cv2 = Conv(c_ * 4, c2, 1, 1)
|
213 |
+
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
214 |
+
|
215 |
+
def forward(self, x):
|
216 |
+
x = self.cv1(x)
|
217 |
+
with warnings.catch_warnings():
|
218 |
+
warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
|
219 |
+
y1 = self.m(x)
|
220 |
+
y2 = self.m(y1)
|
221 |
+
return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
|
222 |
+
|
223 |
+
|
224 |
+
class Focus(nn.Module):
|
225 |
+
# Focus wh information into c-space
|
226 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
227 |
+
super().__init__()
|
228 |
+
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
229 |
+
# self.contract = Contract(gain=2)
|
230 |
+
|
231 |
+
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
232 |
+
return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
|
233 |
+
# return self.conv(self.contract(x))
|
234 |
+
|
235 |
+
|
236 |
+
class GhostConv(nn.Module):
|
237 |
+
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
238 |
+
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
239 |
+
super().__init__()
|
240 |
+
c_ = c2 // 2 # hidden channels
|
241 |
+
self.cv1 = Conv(c1, c_, k, s, None, g, act)
|
242 |
+
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
|
243 |
+
|
244 |
+
def forward(self, x):
|
245 |
+
y = self.cv1(x)
|
246 |
+
return torch.cat((y, self.cv2(y)), 1)
|
247 |
+
|
248 |
+
|
249 |
+
class GhostBottleneck(nn.Module):
|
250 |
+
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
|
251 |
+
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
|
252 |
+
super().__init__()
|
253 |
+
c_ = c2 // 2
|
254 |
+
self.conv = nn.Sequential(
|
255 |
+
GhostConv(c1, c_, 1, 1), # pw
|
256 |
+
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
257 |
+
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
|
258 |
+
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
|
259 |
+
act=False)) if s == 2 else nn.Identity()
|
260 |
+
|
261 |
+
def forward(self, x):
|
262 |
+
return self.conv(x) + self.shortcut(x)
|
263 |
+
|
264 |
+
|
265 |
+
class Contract(nn.Module):
|
266 |
+
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
|
267 |
+
def __init__(self, gain=2):
|
268 |
+
super().__init__()
|
269 |
+
self.gain = gain
|
270 |
+
|
271 |
+
def forward(self, x):
|
272 |
+
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
|
273 |
+
s = self.gain
|
274 |
+
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
|
275 |
+
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
|
276 |
+
return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
|
277 |
+
|
278 |
+
|
279 |
+
class Expand(nn.Module):
|
280 |
+
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
|
281 |
+
def __init__(self, gain=2):
|
282 |
+
super().__init__()
|
283 |
+
self.gain = gain
|
284 |
+
|
285 |
+
def forward(self, x):
|
286 |
+
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
287 |
+
s = self.gain
|
288 |
+
x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
|
289 |
+
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
|
290 |
+
return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
|
291 |
+
|
292 |
+
|
293 |
+
class Concat(nn.Module):
|
294 |
+
# Concatenate a list of tensors along dimension
|
295 |
+
def __init__(self, dimension=1):
|
296 |
+
super().__init__()
|
297 |
+
self.d = dimension
|
298 |
+
|
299 |
+
def forward(self, x):
|
300 |
+
return torch.cat(x, self.d)
|
301 |
+
|
302 |
+
|
303 |
+
class DetectMultiBackend(nn.Module):
|
304 |
+
# YOLOv5 MultiBackend class for python inference on various backends
|
305 |
+
def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
|
306 |
+
# Usage:
|
307 |
+
# PyTorch: weights = *.pt
|
308 |
+
# TorchScript: *.torchscript
|
309 |
+
# ONNX Runtime: *.onnx
|
310 |
+
# ONNX OpenCV DNN: *.onnx with --dnn
|
311 |
+
# OpenVINO: *.xml
|
312 |
+
# CoreML: *.mlmodel
|
313 |
+
# TensorRT: *.engine
|
314 |
+
# TensorFlow SavedModel: *_saved_model
|
315 |
+
# TensorFlow GraphDef: *.pb
|
316 |
+
# TensorFlow Lite: *.tflite
|
317 |
+
# TensorFlow Edge TPU: *_edgetpu.tflite
|
318 |
+
from asone.detectors.yolov5.utils.experimental import attempt_download, attempt_load # scoped to avoid circular import
|
319 |
+
|
320 |
+
super().__init__()
|
321 |
+
w = str(weights[0] if isinstance(weights, list) else weights)
|
322 |
+
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = self.model_type(w) # get backend
|
323 |
+
w = attempt_download(w) # download if not local
|
324 |
+
fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16
|
325 |
+
stride, names = 32, [f'class{i}' for i in range(1000)] # assign defaults
|
326 |
+
if data: # assign class names (optional)
|
327 |
+
with open(data, errors='ignore') as f:
|
328 |
+
names = yaml.safe_load(f)['names']
|
329 |
+
|
330 |
+
if pt: # PyTorch
|
331 |
+
model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
|
332 |
+
stride = max(int(model.stride.max()), 32) # model stride
|
333 |
+
names = model.module.names if hasattr(model, 'module') else model.names # get class names
|
334 |
+
model.half() if fp16 else model.float()
|
335 |
+
self.model = model # explicitly assign for to(), cpu(), cuda(), half()
|
336 |
+
elif jit: # TorchScript
|
337 |
+
LOGGER.info(f'Loading {w} for TorchScript inference...')
|
338 |
+
extra_files = {'config.txt': ''} # model metadata
|
339 |
+
model = torch.jit.load(w, _extra_files=extra_files)
|
340 |
+
model.half() if fp16 else model.float()
|
341 |
+
if extra_files['config.txt']:
|
342 |
+
d = json.loads(extra_files['config.txt']) # extra_files dict
|
343 |
+
stride, names = int(d['stride']), d['names']
|
344 |
+
elif dnn: # ONNX OpenCV DNN
|
345 |
+
LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
|
346 |
+
check_requirements(('opencv-python>=4.5.4',))
|
347 |
+
net = cv2.dnn.readNetFromONNX(w)
|
348 |
+
elif onnx: # ONNX Runtime
|
349 |
+
LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
|
350 |
+
cuda = torch.cuda.is_available()
|
351 |
+
check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
|
352 |
+
import onnxruntime
|
353 |
+
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
|
354 |
+
session = onnxruntime.InferenceSession(w, providers=providers)
|
355 |
+
meta = session.get_modelmeta().custom_metadata_map # metadata
|
356 |
+
if 'stride' in meta:
|
357 |
+
stride, names = int(meta['stride']), eval(meta['names'])
|
358 |
+
elif xml: # OpenVINO
|
359 |
+
LOGGER.info(f'Loading {w} for OpenVINO inference...')
|
360 |
+
check_requirements(('openvino',)) # requires openvino-dev: https://pypi.org/project/openvino-dev/
|
361 |
+
import openvino
|
362 |
+
from openvino.runtime import Core, Layout, get_batch
|
363 |
+
ie = Core()
|
364 |
+
if not Path(w).is_file(): # if not *.xml
|
365 |
+
w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
|
366 |
+
network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
|
367 |
+
if network.get_parameters()[0].get_layout().empty:
|
368 |
+
network.get_parameters()[0].set_layout(Layout("NCHW"))
|
369 |
+
batch_dim = get_batch(network)
|
370 |
+
if batch_dim.is_static:
|
371 |
+
batch_size = batch_dim.get_length()
|
372 |
+
executable_network = ie.compile_model(network, device_name="CPU") # device_name="MYRIAD" for Intel NCS2
|
373 |
+
output_layer = next(iter(executable_network.outputs))
|
374 |
+
meta = Path(w).with_suffix('.yaml')
|
375 |
+
if meta.exists():
|
376 |
+
stride, names = self._load_metadata(meta) # load metadata
|
377 |
+
elif engine: # TensorRT
|
378 |
+
LOGGER.info(f'Loading {w} for TensorRT inference...')
|
379 |
+
import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
|
380 |
+
check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
|
381 |
+
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
|
382 |
+
logger = trt.Logger(trt.Logger.INFO)
|
383 |
+
with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
|
384 |
+
model = runtime.deserialize_cuda_engine(f.read())
|
385 |
+
context = model.create_execution_context()
|
386 |
+
bindings = OrderedDict()
|
387 |
+
fp16 = False # default updated below
|
388 |
+
dynamic_input = False
|
389 |
+
for index in range(model.num_bindings):
|
390 |
+
name = model.get_binding_name(index)
|
391 |
+
dtype = trt.nptype(model.get_binding_dtype(index))
|
392 |
+
if model.binding_is_input(index):
|
393 |
+
if -1 in tuple(model.get_binding_shape(index)): # dynamic
|
394 |
+
dynamic_input = True
|
395 |
+
context.set_binding_shape(index, tuple(model.get_profile_shape(0, index)[2]))
|
396 |
+
if dtype == np.float16:
|
397 |
+
fp16 = True
|
398 |
+
shape = tuple(context.get_binding_shape(index))
|
399 |
+
data = torch.from_numpy(np.empty(shape, dtype=np.dtype(dtype))).to(device)
|
400 |
+
bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
|
401 |
+
binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
|
402 |
+
batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
|
403 |
+
elif coreml: # CoreML
|
404 |
+
LOGGER.info(f'Loading {w} for CoreML inference...')
|
405 |
+
import coremltools as ct
|
406 |
+
model = ct.models.MLModel(w)
|
407 |
+
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
|
408 |
+
if saved_model: # SavedModel
|
409 |
+
LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
|
410 |
+
import tensorflow as tf
|
411 |
+
keras = False # assume TF1 saved_model
|
412 |
+
model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
|
413 |
+
elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
|
414 |
+
LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
|
415 |
+
import tensorflow as tf
|
416 |
+
|
417 |
+
def wrap_frozen_graph(gd, inputs, outputs):
|
418 |
+
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
|
419 |
+
ge = x.graph.as_graph_element
|
420 |
+
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
|
421 |
+
|
422 |
+
gd = tf.Graph().as_graph_def() # graph_def
|
423 |
+
with open(w, 'rb') as f:
|
424 |
+
gd.ParseFromString(f.read())
|
425 |
+
frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs="Identity:0")
|
426 |
+
elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
|
427 |
+
try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
|
428 |
+
from tflite_runtime.interpreter import Interpreter, load_delegate
|
429 |
+
except ImportError:
|
430 |
+
import tensorflow as tf
|
431 |
+
Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
|
432 |
+
if edgetpu: # Edge TPU https://coral.ai/software/#edgetpu-runtime
|
433 |
+
LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
|
434 |
+
delegate = {
|
435 |
+
'Linux': 'libedgetpu.so.1',
|
436 |
+
'Darwin': 'libedgetpu.1.dylib',
|
437 |
+
'Windows': 'edgetpu.dll'}[platform.system()]
|
438 |
+
interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
|
439 |
+
else: # Lite
|
440 |
+
LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
|
441 |
+
interpreter = Interpreter(model_path=w) # load TFLite model
|
442 |
+
interpreter.allocate_tensors() # allocate
|
443 |
+
input_details = interpreter.get_input_details() # inputs
|
444 |
+
output_details = interpreter.get_output_details() # outputs
|
445 |
+
elif tfjs:
|
446 |
+
raise Exception('ERROR: YOLOv5 TF.js inference is not supported')
|
447 |
+
else:
|
448 |
+
raise Exception(f'ERROR: {w} is not a supported format')
|
449 |
+
self.__dict__.update(locals()) # assign all variables to self
|
450 |
+
|
451 |
+
def forward(self, im, augment=False, visualize=False, val=False):
|
452 |
+
# YOLOv5 MultiBackend inference
|
453 |
+
b, ch, h, w = im.shape # batch, channel, height, width
|
454 |
+
if self.fp16 and im.dtype != torch.float16:
|
455 |
+
im = im.half() # to FP16
|
456 |
+
|
457 |
+
if self.pt: # PyTorch
|
458 |
+
y = self.model(im, augment=augment, visualize=visualize)[0]
|
459 |
+
elif self.jit: # TorchScript
|
460 |
+
y = self.model(im)[0]
|
461 |
+
elif self.dnn: # ONNX OpenCV DNN
|
462 |
+
im = im.cpu().numpy() # torch to numpy
|
463 |
+
self.net.setInput(im)
|
464 |
+
y = self.net.forward()
|
465 |
+
elif self.onnx: # ONNX Runtime
|
466 |
+
im = im.cpu().numpy() # torch to numpy
|
467 |
+
y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
|
468 |
+
elif self.xml: # OpenVINO
|
469 |
+
im = im.cpu().numpy() # FP32
|
470 |
+
y = self.executable_network([im])[self.output_layer]
|
471 |
+
elif self.engine: # TensorRT
|
472 |
+
if im.shape != self.bindings['images'].shape and self.dynamic_input:
|
473 |
+
self.context.set_binding_shape(self.model.get_binding_index('images'), im.shape) # reshape if dynamic
|
474 |
+
self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
|
475 |
+
assert im.shape == self.bindings['images'].shape, (
|
476 |
+
f"image shape {im.shape} exceeds model max shape {self.bindings['images'].shape}" if self.dynamic_input
|
477 |
+
else f"image shape {im.shape} does not match model shape {self.bindings['images'].shape}")
|
478 |
+
self.binding_addrs['images'] = int(im.data_ptr())
|
479 |
+
self.context.execute_v2(list(self.binding_addrs.values()))
|
480 |
+
y = self.bindings['output'].data
|
481 |
+
elif self.coreml: # CoreML
|
482 |
+
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
|
483 |
+
im = Image.fromarray((im[0] * 255).astype('uint8'))
|
484 |
+
# im = im.resize((192, 320), Image.ANTIALIAS)
|
485 |
+
y = self.model.predict({'image': im}) # coordinates are xywh normalized
|
486 |
+
if 'confidence' in y:
|
487 |
+
box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
|
488 |
+
conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
|
489 |
+
y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
|
490 |
+
else:
|
491 |
+
k = 'var_' + str(sorted(int(k.replace('var_', '')) for k in y)[-1]) # output key
|
492 |
+
y = y[k] # output
|
493 |
+
else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
|
494 |
+
im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3)
|
495 |
+
if self.saved_model: # SavedModel
|
496 |
+
y = (self.model(im, training=False) if self.keras else self.model(im)).numpy()
|
497 |
+
elif self.pb: # GraphDef
|
498 |
+
y = self.frozen_func(x=self.tf.constant(im)).numpy()
|
499 |
+
else: # Lite or Edge TPU
|
500 |
+
input, output = self.input_details[0], self.output_details[0]
|
501 |
+
int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
|
502 |
+
if int8:
|
503 |
+
scale, zero_point = input['quantization']
|
504 |
+
im = (im / scale + zero_point).astype(np.uint8) # de-scale
|
505 |
+
self.interpreter.set_tensor(input['index'], im)
|
506 |
+
self.interpreter.invoke()
|
507 |
+
y = self.interpreter.get_tensor(output['index'])
|
508 |
+
if int8:
|
509 |
+
scale, zero_point = output['quantization']
|
510 |
+
y = (y.astype(np.float32) - zero_point) * scale # re-scale
|
511 |
+
y[..., :4] *= [w, h, w, h] # xywh normalized to pixels
|
512 |
+
|
513 |
+
if isinstance(y, np.ndarray):
|
514 |
+
y = torch.tensor(y, device=self.device)
|
515 |
+
return (y, []) if val else y
|
516 |
+
|
517 |
+
def warmup(self, imgsz=(1, 3, 640, 640)):
|
518 |
+
# Warmup model by running inference once
|
519 |
+
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb
|
520 |
+
if any(warmup_types) and self.device.type != 'cpu':
|
521 |
+
im = torch.zeros(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
|
522 |
+
for _ in range(2 if self.jit else 1): #
|
523 |
+
self.forward(im) # warmup
|
524 |
+
|
525 |
+
@staticmethod
|
526 |
+
def model_type(p='path/to/model.pt'):
|
527 |
+
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
|
528 |
+
from export import export_formats
|
529 |
+
suffixes = list(export_formats().Suffix) + ['.xml'] # export suffixes
|
530 |
+
check_suffix(p, suffixes) # checks
|
531 |
+
p = Path(p).name # eliminate trailing separators
|
532 |
+
pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, xml2 = (s in p for s in suffixes)
|
533 |
+
xml |= xml2 # *_openvino_model or *.xml
|
534 |
+
tflite &= not edgetpu # *.tflite
|
535 |
+
return pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs
|
536 |
+
|
537 |
+
@staticmethod
|
538 |
+
def _load_metadata(f='path/to/meta.yaml'):
|
539 |
+
# Load metadata from meta.yaml if it exists
|
540 |
+
with open(f, errors='ignore') as f:
|
541 |
+
d = yaml.safe_load(f)
|
542 |
+
return d['stride'], d['names'] # assign stride, names
|
543 |
+
|
544 |
+
|
545 |
+
class AutoShape(nn.Module):
|
546 |
+
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
547 |
+
conf = 0.25 # NMS confidence threshold
|
548 |
+
iou = 0.45 # NMS IoU threshold
|
549 |
+
agnostic = False # NMS class-agnostic
|
550 |
+
multi_label = False # NMS multiple labels per box
|
551 |
+
classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
|
552 |
+
max_det = 1000 # maximum number of detections per image
|
553 |
+
amp = False # Automatic Mixed Precision (AMP) inference
|
554 |
+
|
555 |
+
def __init__(self, model, verbose=True):
|
556 |
+
super().__init__()
|
557 |
+
if verbose:
|
558 |
+
LOGGER.info('Adding AutoShape... ')
|
559 |
+
copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
|
560 |
+
self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
|
561 |
+
self.pt = not self.dmb or model.pt # PyTorch model
|
562 |
+
self.model = model.eval()
|
563 |
+
|
564 |
+
def _apply(self, fn):
|
565 |
+
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
566 |
+
self = super()._apply(fn)
|
567 |
+
if self.pt:
|
568 |
+
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
|
569 |
+
m.stride = fn(m.stride)
|
570 |
+
m.grid = list(map(fn, m.grid))
|
571 |
+
if isinstance(m.anchor_grid, list):
|
572 |
+
m.anchor_grid = list(map(fn, m.anchor_grid))
|
573 |
+
return self
|
574 |
+
|
575 |
+
@torch.no_grad()
|
576 |
+
def forward(self, imgs, size=640, augment=False, profile=False):
|
577 |
+
# Inference from various sources. For height=640, width=1280, RGB images example inputs are:
|
578 |
+
# file: imgs = 'data/images/zidane.jpg' # str or PosixPath
|
579 |
+
# URI: = 'https://ultralytics.com/images/zidane.jpg'
|
580 |
+
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
581 |
+
# PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
|
582 |
+
# numpy: = np.zeros((640,1280,3)) # HWC
|
583 |
+
# torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
|
584 |
+
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
585 |
+
|
586 |
+
t = [time_sync()]
|
587 |
+
p = next(self.model.parameters()) if self.pt else torch.zeros(1, device=self.model.device) # for device, type
|
588 |
+
autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
|
589 |
+
if isinstance(imgs, torch.Tensor): # torch
|
590 |
+
with amp.autocast(autocast):
|
591 |
+
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
|
592 |
+
|
593 |
+
# Pre-process
|
594 |
+
n, imgs = (len(imgs), list(imgs)) if isinstance(imgs, (list, tuple)) else (1, [imgs]) # number, list of images
|
595 |
+
shape0, shape1, files = [], [], [] # image and inference shapes, filenames
|
596 |
+
for i, im in enumerate(imgs):
|
597 |
+
f = f'image{i}' # filename
|
598 |
+
if isinstance(im, (str, Path)): # filename or uri
|
599 |
+
im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
|
600 |
+
im = np.asarray(exif_transpose(im))
|
601 |
+
elif isinstance(im, Image.Image): # PIL Image
|
602 |
+
im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
|
603 |
+
files.append(Path(f).with_suffix('.jpg').name)
|
604 |
+
if im.shape[0] < 5: # image in CHW
|
605 |
+
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
606 |
+
im = im[..., :3] if im.ndim == 3 else np.tile(im[..., None], 3) # enforce 3ch input
|
607 |
+
s = im.shape[:2] # HWC
|
608 |
+
shape0.append(s) # image shape
|
609 |
+
g = (size / max(s)) # gain
|
610 |
+
shape1.append([y * g for y in s])
|
611 |
+
imgs[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
|
612 |
+
shape1 = [make_divisible(x, self.stride) if self.pt else size for x in np.array(shape1).max(0)] # inf shape
|
613 |
+
x = [letterbox(im, shape1, auto=False)[0] for im in imgs] # pad
|
614 |
+
x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
|
615 |
+
x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
|
616 |
+
t.append(time_sync())
|
617 |
+
|
618 |
+
with amp.autocast(autocast):
|
619 |
+
# Inference
|
620 |
+
y = self.model(x, augment, profile) # forward
|
621 |
+
t.append(time_sync())
|
622 |
+
|
623 |
+
# Post-process
|
624 |
+
y = non_max_suppression(y if self.dmb else y[0],
|
625 |
+
self.conf,
|
626 |
+
self.iou,
|
627 |
+
self.classes,
|
628 |
+
self.agnostic,
|
629 |
+
self.multi_label,
|
630 |
+
max_det=self.max_det) # NMS
|
631 |
+
for i in range(n):
|
632 |
+
scale_coords(shape1, y[i][:, :4], shape0[i])
|
633 |
+
|
634 |
+
t.append(time_sync())
|
635 |
+
return Detections(imgs, y, files, t, self.names, x.shape)
|
636 |
+
|
637 |
+
|
638 |
+
class Detections:
|
639 |
+
# YOLOv5 detections class for inference results
|
640 |
+
def __init__(self, imgs, pred, files, times=(0, 0, 0, 0), names=None, shape=None):
|
641 |
+
super().__init__()
|
642 |
+
d = pred[0].device # device
|
643 |
+
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations
|
644 |
+
self.imgs = imgs # list of images as numpy arrays
|
645 |
+
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
646 |
+
self.names = names # class names
|
647 |
+
self.files = files # image filenames
|
648 |
+
self.times = times # profiling times
|
649 |
+
self.xyxy = pred # xyxy pixels
|
650 |
+
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
651 |
+
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
652 |
+
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
653 |
+
self.n = len(self.pred) # number of images (batch size)
|
654 |
+
self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
|
655 |
+
self.s = shape # inference BCHW shape
|
656 |
+
|
657 |
+
def display(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
|
658 |
+
crops = []
|
659 |
+
for i, (im, pred) in enumerate(zip(self.imgs, self.pred)):
|
660 |
+
s = f'image {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
|
661 |
+
if pred.shape[0]:
|
662 |
+
for c in pred[:, -1].unique():
|
663 |
+
n = (pred[:, -1] == c).sum() # detections per class
|
664 |
+
s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
|
665 |
+
if show or save or render or crop:
|
666 |
+
annotator = Annotator(im, example=str(self.names))
|
667 |
+
for *box, conf, cls in reversed(pred): # xyxy, confidence, class
|
668 |
+
label = f'{self.names[int(cls)]} {conf:.2f}'
|
669 |
+
if crop:
|
670 |
+
file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
|
671 |
+
crops.append({
|
672 |
+
'box': box,
|
673 |
+
'conf': conf,
|
674 |
+
'cls': cls,
|
675 |
+
'label': label,
|
676 |
+
'im': save_one_box(box, im, file=file, save=save)})
|
677 |
+
else: # all others
|
678 |
+
annotator.box_label(box, label if labels else '', color=colors(cls))
|
679 |
+
im = annotator.im
|
680 |
+
else:
|
681 |
+
s += '(no detections)'
|
682 |
+
|
683 |
+
im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
|
684 |
+
if pprint:
|
685 |
+
print(s.rstrip(', '))
|
686 |
+
if show:
|
687 |
+
im.show(self.files[i]) # show
|
688 |
+
if save:
|
689 |
+
f = self.files[i]
|
690 |
+
im.save(save_dir / f) # save
|
691 |
+
if i == self.n - 1:
|
692 |
+
LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
|
693 |
+
if render:
|
694 |
+
self.imgs[i] = np.asarray(im)
|
695 |
+
if crop:
|
696 |
+
if save:
|
697 |
+
LOGGER.info(f'Saved results to {save_dir}\n')
|
698 |
+
return crops
|
699 |
+
|
700 |
+
def print(self):
|
701 |
+
self.display(pprint=True) # print results
|
702 |
+
print(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {tuple(self.s)}' % self.t)
|
703 |
+
|
704 |
+
def show(self, labels=True):
|
705 |
+
self.display(show=True, labels=labels) # show results
|
706 |
+
|
707 |
+
def save(self, labels=True, save_dir='runs/detect/exp'):
|
708 |
+
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) # increment save_dir
|
709 |
+
self.display(save=True, labels=labels, save_dir=save_dir) # save results
|
710 |
+
|
711 |
+
def crop(self, save=True, save_dir='runs/detect/exp'):
|
712 |
+
save_dir = increment_path(save_dir, exist_ok=save_dir != 'runs/detect/exp', mkdir=True) if save else None
|
713 |
+
return self.display(crop=True, save=save, save_dir=save_dir) # crop results
|
714 |
+
|
715 |
+
def render(self, labels=True):
|
716 |
+
self.display(render=True, labels=labels) # render results
|
717 |
+
return self.imgs
|
718 |
+
|
719 |
+
def pandas(self):
|
720 |
+
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
|
721 |
+
new = copy(self) # return copy
|
722 |
+
ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
|
723 |
+
cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
|
724 |
+
for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
|
725 |
+
a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
|
726 |
+
setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
|
727 |
+
return new
|
728 |
+
|
729 |
+
def tolist(self):
|
730 |
+
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
731 |
+
r = range(self.n) # iterable
|
732 |
+
x = [Detections([self.imgs[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
|
733 |
+
# for d in x:
|
734 |
+
# for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
|
735 |
+
# setattr(d, k, getattr(d, k)[0]) # pop out of list
|
736 |
+
return x
|
737 |
+
|
738 |
+
def __len__(self):
|
739 |
+
return self.n # override len(results)
|
740 |
+
|
741 |
+
def __str__(self):
|
742 |
+
self.print() # override print(results)
|
743 |
+
return ''
|
744 |
+
|
745 |
+
|
746 |
+
class Classify(nn.Module):
|
747 |
+
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
|
748 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
|
749 |
+
super().__init__()
|
750 |
+
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
|
751 |
+
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
|
752 |
+
self.flat = nn.Flatten()
|
753 |
+
|
754 |
+
def forward(self, x):
|
755 |
+
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
|
756 |
+
return self.flat(self.conv(z)) # flatten to x(b,c2)
|
asone/detectors/yolov5/yolov5/models/experimental.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
2 |
+
"""
|
3 |
+
Experimental modules
|
4 |
+
"""
|
5 |
+
import math
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
import torch.nn as nn
|
10 |
+
from asone.detectors.yolov5.yolov5.utils.yolov5_utils import yolov5_in_syspath
|
11 |
+
|
12 |
+
class Ensemble(nn.ModuleList):
|
13 |
+
# Ensemble of models
|
14 |
+
def __init__(self):
|
15 |
+
super().__init__()
|
16 |
+
|
17 |
+
def forward(self, x, augment=False, profile=False, visualize=False):
|
18 |
+
y = [module(x, augment, profile, visualize)[0] for module in self]
|
19 |
+
# y = torch.stack(y).max(0)[0] # max ensemble
|
20 |
+
# y = torch.stack(y).mean(0) # mean ensemble
|
21 |
+
y = torch.cat(y, 1) # nms ensemble
|
22 |
+
return y, None # inference, train output
|
23 |
+
|
24 |
+
|
25 |
+
def attempt_load(weights, device=None, inplace=True, fuse=True):
|
26 |
+
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
|
27 |
+
with yolov5_in_syspath():
|
28 |
+
from asone.detectors.yolov5.yolov5.models.yolo import Detect, Model
|
29 |
+
|
30 |
+
model = Ensemble()
|
31 |
+
for w in weights if isinstance(weights, list) else [weights]:
|
32 |
+
with yolov5_in_syspath():
|
33 |
+
ckpt = torch.load(w, map_location='cpu') # load
|
34 |
+
ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float() # FP32 model
|
35 |
+
model.append(ckpt.fuse().eval() if fuse else ckpt.eval()) # fused or un-fused model in eval mode
|
36 |
+
|
37 |
+
# Compatibility updates
|
38 |
+
for m in model.modules():
|
39 |
+
t = type(m)
|
40 |
+
if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model):
|
41 |
+
m.inplace = inplace # torch 1.7.0 compatibility
|
42 |
+
if t is Detect and not isinstance(m.anchor_grid, list):
|
43 |
+
delattr(m, 'anchor_grid')
|
44 |
+
setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)
|
45 |
+
elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):
|
46 |
+
m.recompute_scale_factor = None # torch 1.11.0 compatibility
|
47 |
+
|
48 |
+
if len(model) == 1:
|
49 |
+
return model[-1] # return model
|
50 |
+
print(f'Ensemble created with {weights}\n')
|
51 |
+
for k in 'names', 'nc', 'yaml':
|
52 |
+
setattr(model, k, getattr(model[0], k))
|
53 |
+
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
|
54 |
+
assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'
|
55 |
+
return model # return ensemble
|
56 |
+
|
asone/detectors/yolov5/yolov5/models/general.py
ADDED
@@ -0,0 +1,1036 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
2 |
+
"""
|
3 |
+
General utils
|
4 |
+
"""
|
5 |
+
|
6 |
+
import contextlib
|
7 |
+
import glob
|
8 |
+
import inspect
|
9 |
+
import logging
|
10 |
+
import math
|
11 |
+
import os
|
12 |
+
import platform
|
13 |
+
import random
|
14 |
+
import re
|
15 |
+
import shutil
|
16 |
+
import signal
|
17 |
+
import threading
|
18 |
+
import time
|
19 |
+
import urllib
|
20 |
+
from datetime import datetime
|
21 |
+
from itertools import repeat
|
22 |
+
from multiprocessing.pool import ThreadPool
|
23 |
+
from pathlib import Path
|
24 |
+
from subprocess import check_output
|
25 |
+
from typing import Optional
|
26 |
+
from zipfile import ZipFile
|
27 |
+
|
28 |
+
import cv2
|
29 |
+
import numpy as np
|
30 |
+
import pandas as pd
|
31 |
+
import pkg_resources as pkg
|
32 |
+
import torch
|
33 |
+
import torchvision
|
34 |
+
import yaml
|
35 |
+
|
36 |
+
FILE = Path(__file__).resolve()
|
37 |
+
ROOT = FILE.parents[1] # YOLOv5 root directory
|
38 |
+
RANK = int(os.getenv('RANK', -1))
|
39 |
+
|
40 |
+
# Settings
|
41 |
+
DATASETS_DIR = ROOT.parent / 'datasets' # YOLOv5 datasets directory
|
42 |
+
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
|
43 |
+
AUTOINSTALL = str(os.getenv('YOLOv5_AUTOINSTALL', True)).lower() == 'true' # global auto-install mode
|
44 |
+
VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
|
45 |
+
FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
|
46 |
+
|
47 |
+
torch.set_printoptions(linewidth=320, precision=5, profile='long')
|
48 |
+
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
|
49 |
+
pd.options.display.max_columns = 10
|
50 |
+
cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
|
51 |
+
os.environ['NUMEXPR_MAX_THREADS'] = str(NUM_THREADS) # NumExpr max threads
|
52 |
+
os.environ['OMP_NUM_THREADS'] = '1' if platform.system() == 'darwin' else str(NUM_THREADS) # OpenMP (PyTorch and SciPy)
|
53 |
+
|
54 |
+
|
55 |
+
def is_kaggle():
|
56 |
+
# Is environment a Kaggle Notebook?
|
57 |
+
try:
|
58 |
+
assert os.environ.get('PWD') == '/kaggle/working'
|
59 |
+
assert os.environ.get('KAGGLE_URL_BASE') == 'https://www.kaggle.com'
|
60 |
+
return True
|
61 |
+
except AssertionError:
|
62 |
+
return False
|
63 |
+
|
64 |
+
|
65 |
+
def is_writeable(dir, test=False):
|
66 |
+
# Return True if directory has write permissions, test opening a file with write permissions if test=True
|
67 |
+
if not test:
|
68 |
+
return os.access(dir, os.R_OK) # possible issues on Windows
|
69 |
+
file = Path(dir) / 'tmp.txt'
|
70 |
+
try:
|
71 |
+
with open(file, 'w'): # open file with write permissions
|
72 |
+
pass
|
73 |
+
file.unlink() # remove file
|
74 |
+
return True
|
75 |
+
except OSError:
|
76 |
+
return False
|
77 |
+
|
78 |
+
|
79 |
+
def set_logging(name=None, verbose=VERBOSE):
|
80 |
+
# Sets level and returns logger
|
81 |
+
if is_kaggle():
|
82 |
+
for h in logging.root.handlers:
|
83 |
+
logging.root.removeHandler(h) # remove all handlers associated with the root logger object
|
84 |
+
rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings
|
85 |
+
level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
|
86 |
+
log = logging.getLogger(name)
|
87 |
+
log.setLevel(level)
|
88 |
+
handler = logging.StreamHandler()
|
89 |
+
handler.setFormatter(logging.Formatter("%(message)s"))
|
90 |
+
handler.setLevel(level)
|
91 |
+
log.addHandler(handler)
|
92 |
+
|
93 |
+
|
94 |
+
set_logging() # run before defining LOGGER
|
95 |
+
LOGGER = logging.getLogger("yolov5") # define globally (used in train.py, val.py, detect.py, etc.)
|
96 |
+
|
97 |
+
|
98 |
+
def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'):
|
99 |
+
# Return path of user configuration directory. Prefer environment variable if exists. Make dir if required.
|
100 |
+
env = os.getenv(env_var)
|
101 |
+
if env:
|
102 |
+
path = Path(env) # use environment variable
|
103 |
+
else:
|
104 |
+
cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs
|
105 |
+
path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir
|
106 |
+
path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable
|
107 |
+
path.mkdir(exist_ok=True) # make if required
|
108 |
+
return path
|
109 |
+
|
110 |
+
|
111 |
+
CONFIG_DIR = user_config_dir() # Ultralytics settings dir
|
112 |
+
|
113 |
+
|
114 |
+
class Profile(contextlib.ContextDecorator):
|
115 |
+
# Usage: @Profile() decorator or 'with Profile():' context manager
|
116 |
+
def __enter__(self):
|
117 |
+
self.start = time.time()
|
118 |
+
|
119 |
+
def __exit__(self, type, value, traceback):
|
120 |
+
print(f'Profile results: {time.time() - self.start:.5f}s')
|
121 |
+
|
122 |
+
|
123 |
+
class Timeout(contextlib.ContextDecorator):
|
124 |
+
# Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager
|
125 |
+
def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors=True):
|
126 |
+
self.seconds = int(seconds)
|
127 |
+
self.timeout_message = timeout_msg
|
128 |
+
self.suppress = bool(suppress_timeout_errors)
|
129 |
+
|
130 |
+
def _timeout_handler(self, signum, frame):
|
131 |
+
raise TimeoutError(self.timeout_message)
|
132 |
+
|
133 |
+
def __enter__(self):
|
134 |
+
if platform.system() != 'Windows': # not supported on Windows
|
135 |
+
signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM
|
136 |
+
signal.alarm(self.seconds) # start countdown for SIGALRM to be raised
|
137 |
+
|
138 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
139 |
+
if platform.system() != 'Windows':
|
140 |
+
signal.alarm(0) # Cancel SIGALRM if it's scheduled
|
141 |
+
if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError
|
142 |
+
return True
|
143 |
+
|
144 |
+
|
145 |
+
class WorkingDirectory(contextlib.ContextDecorator):
|
146 |
+
# Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager
|
147 |
+
def __init__(self, new_dir):
|
148 |
+
self.dir = new_dir # new dir
|
149 |
+
self.cwd = Path.cwd().resolve() # current dir
|
150 |
+
|
151 |
+
def __enter__(self):
|
152 |
+
os.chdir(self.dir)
|
153 |
+
|
154 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
155 |
+
os.chdir(self.cwd)
|
156 |
+
|
157 |
+
|
158 |
+
def try_except(func):
|
159 |
+
# try-except function. Usage: @try_except decorator
|
160 |
+
def handler(*args, **kwargs):
|
161 |
+
try:
|
162 |
+
func(*args, **kwargs)
|
163 |
+
except Exception as e:
|
164 |
+
print(e)
|
165 |
+
|
166 |
+
return handler
|
167 |
+
|
168 |
+
|
169 |
+
def threaded(func):
|
170 |
+
# Multi-threads a target function and returns thread. Usage: @threaded decorator
|
171 |
+
def wrapper(*args, **kwargs):
|
172 |
+
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
|
173 |
+
thread.start()
|
174 |
+
return thread
|
175 |
+
|
176 |
+
return wrapper
|
177 |
+
|
178 |
+
|
179 |
+
def methods(instance):
|
180 |
+
# Get class/instance methods
|
181 |
+
return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")]
|
182 |
+
|
183 |
+
|
184 |
+
def print_args(args: Optional[dict] = None, show_file=True, show_fcn=False):
|
185 |
+
# Print function arguments (optional args dict)
|
186 |
+
x = inspect.currentframe().f_back # previous frame
|
187 |
+
file, _, fcn, _, _ = inspect.getframeinfo(x)
|
188 |
+
if args is None: # get args automatically
|
189 |
+
args, _, _, frm = inspect.getargvalues(x)
|
190 |
+
args = {k: v for k, v in frm.items() if k in args}
|
191 |
+
s = (f'{Path(file).stem}: ' if show_file else '') + (f'{fcn}: ' if show_fcn else '')
|
192 |
+
LOGGER.info(colorstr(s) + ', '.join(f'{k}={v}' for k, v in args.items()))
|
193 |
+
|
194 |
+
|
195 |
+
def init_seeds(seed=0, deterministic=False):
|
196 |
+
# Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html
|
197 |
+
# cudnn seed 0 settings are slower and more reproducible, else faster and less reproducible
|
198 |
+
import torch.backends.cudnn as cudnn
|
199 |
+
|
200 |
+
if deterministic and check_version(torch.__version__, '1.12.0'): # https://github.com/ultralytics/yolov5/pull/8213
|
201 |
+
torch.use_deterministic_algorithms(True)
|
202 |
+
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
|
203 |
+
os.environ['PYTHONHASHSEED'] = str(seed)
|
204 |
+
|
205 |
+
random.seed(seed)
|
206 |
+
np.random.seed(seed)
|
207 |
+
torch.manual_seed(seed)
|
208 |
+
cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False)
|
209 |
+
torch.cuda.manual_seed(seed)
|
210 |
+
torch.cuda.manual_seed_all(seed) # for Multi-GPU, exception safe
|
211 |
+
|
212 |
+
|
213 |
+
def intersect_dicts(da, db, exclude=()):
|
214 |
+
# Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
|
215 |
+
return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
|
216 |
+
|
217 |
+
|
218 |
+
def get_latest_run(search_dir='.'):
|
219 |
+
# Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
|
220 |
+
last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
|
221 |
+
return max(last_list, key=os.path.getctime) if last_list else ''
|
222 |
+
|
223 |
+
|
224 |
+
def is_docker() -> bool:
|
225 |
+
"""Check if the process runs inside a docker container."""
|
226 |
+
if Path("/.dockerenv").exists():
|
227 |
+
return True
|
228 |
+
try: # check if docker is in control groups
|
229 |
+
with open("/proc/self/cgroup") as file:
|
230 |
+
return any("docker" in line for line in file)
|
231 |
+
except OSError:
|
232 |
+
return False
|
233 |
+
|
234 |
+
|
235 |
+
def is_colab():
|
236 |
+
# Is environment a Google Colab instance?
|
237 |
+
try:
|
238 |
+
import google.colab
|
239 |
+
return True
|
240 |
+
except ImportError:
|
241 |
+
return False
|
242 |
+
|
243 |
+
|
244 |
+
def is_pip():
|
245 |
+
# Is file in a pip package?
|
246 |
+
return 'site-packages' in Path(__file__).resolve().parts
|
247 |
+
|
248 |
+
|
249 |
+
def is_ascii(s=''):
|
250 |
+
# Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)
|
251 |
+
s = str(s) # convert list, tuple, None, etc. to str
|
252 |
+
return len(s.encode().decode('ascii', 'ignore')) == len(s)
|
253 |
+
|
254 |
+
|
255 |
+
def is_chinese(s='人工智能'):
|
256 |
+
# Is string composed of any Chinese characters?
|
257 |
+
return bool(re.search('[\u4e00-\u9fff]', str(s)))
|
258 |
+
|
259 |
+
|
260 |
+
def emojis(str=''):
|
261 |
+
# Return platform-dependent emoji-safe version of string
|
262 |
+
return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
|
263 |
+
|
264 |
+
|
265 |
+
def file_age(path=__file__):
|
266 |
+
# Return days since last file update
|
267 |
+
dt = (datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime)) # delta
|
268 |
+
return dt.days # + dt.seconds / 86400 # fractional days
|
269 |
+
|
270 |
+
|
271 |
+
def file_date(path=__file__):
|
272 |
+
# Return human-readable file modification date, i.e. '2021-3-26'
|
273 |
+
t = datetime.fromtimestamp(Path(path).stat().st_mtime)
|
274 |
+
return f'{t.year}-{t.month}-{t.day}'
|
275 |
+
|
276 |
+
|
277 |
+
def file_size(path):
|
278 |
+
# Return file/dir size (MB)
|
279 |
+
mb = 1 << 20 # bytes to MiB (1024 ** 2)
|
280 |
+
path = Path(path)
|
281 |
+
if path.is_file():
|
282 |
+
return path.stat().st_size / mb
|
283 |
+
elif path.is_dir():
|
284 |
+
return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / mb
|
285 |
+
else:
|
286 |
+
return 0.0
|
287 |
+
|
288 |
+
|
289 |
+
def check_online():
|
290 |
+
# Check internet connectivity
|
291 |
+
import socket
|
292 |
+
try:
|
293 |
+
socket.create_connection(("1.1.1.1", 443), 5) # check host accessibility
|
294 |
+
return True
|
295 |
+
except OSError:
|
296 |
+
return False
|
297 |
+
|
298 |
+
|
299 |
+
def git_describe(path=ROOT): # path must be a directory
|
300 |
+
# Return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
|
301 |
+
try:
|
302 |
+
assert (Path(path) / '.git').is_dir()
|
303 |
+
return check_output(f'git -C {path} describe --tags --long --always', shell=True).decode()[:-1]
|
304 |
+
except Exception:
|
305 |
+
return ''
|
306 |
+
|
307 |
+
|
308 |
+
@try_except
|
309 |
+
@WorkingDirectory(ROOT)
|
310 |
+
def check_git_status(repo='ultralytics/yolov5'):
|
311 |
+
# YOLOv5 status check, recommend 'git pull' if code is out of date
|
312 |
+
url = f'https://github.com/{repo}'
|
313 |
+
msg = f', for updates see {url}'
|
314 |
+
s = colorstr('github: ') # string
|
315 |
+
assert Path('.git').exists(), s + 'skipping check (not a git repository)' + msg
|
316 |
+
assert check_online(), s + 'skipping check (offline)' + msg
|
317 |
+
|
318 |
+
splits = re.split(pattern=r'\s', string=check_output('git remote -v', shell=True).decode())
|
319 |
+
matches = [repo in s for s in splits]
|
320 |
+
if any(matches):
|
321 |
+
remote = splits[matches.index(True) - 1]
|
322 |
+
else:
|
323 |
+
remote = 'ultralytics'
|
324 |
+
check_output(f'git remote add {remote} {url}', shell=True)
|
325 |
+
check_output(f'git fetch {remote}', shell=True, timeout=5) # git fetch
|
326 |
+
branch = check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out
|
327 |
+
n = int(check_output(f'git rev-list {branch}..{remote}/master --count', shell=True)) # commits behind
|
328 |
+
if n > 0:
|
329 |
+
pull = 'git pull' if remote == 'origin' else f'git pull {remote} master'
|
330 |
+
s += f"⚠️ YOLOv5 is out of date by {n} commit{'s' * (n > 1)}. Use `{pull}` or `git clone {url}` to update."
|
331 |
+
else:
|
332 |
+
s += f'up to date with {url} ✅'
|
333 |
+
LOGGER.info(emojis(s)) # emoji-safe
|
334 |
+
|
335 |
+
|
336 |
+
def check_python(minimum='3.7.0'):
|
337 |
+
# Check current python version vs. required python version
|
338 |
+
check_version(platform.python_version(), minimum, name='Python ', hard=True)
|
339 |
+
|
340 |
+
|
341 |
+
def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False):
|
342 |
+
# Check version vs. required version
|
343 |
+
current, minimum = (pkg.parse_version(x) for x in (current, minimum))
|
344 |
+
result = (current == minimum) if pinned else (current >= minimum) # bool
|
345 |
+
s = f'{name}{minimum} required by YOLOv5, but {name}{current} is currently installed' # string
|
346 |
+
if hard:
|
347 |
+
assert result, s # assert min requirements met
|
348 |
+
if verbose and not result:
|
349 |
+
LOGGER.warning(s)
|
350 |
+
return result
|
351 |
+
|
352 |
+
|
353 |
+
@try_except
|
354 |
+
def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), install=True, cmds=()):
|
355 |
+
# Check installed dependencies meet requirements (pass *.txt file or list of packages)
|
356 |
+
prefix = colorstr('red', 'bold', 'requirements:')
|
357 |
+
check_python() # check python version
|
358 |
+
if isinstance(requirements, (str, Path)): # requirements.txt file
|
359 |
+
file = Path(requirements)
|
360 |
+
assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
|
361 |
+
with file.open() as f:
|
362 |
+
requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
|
363 |
+
else: # list or tuple of packages
|
364 |
+
requirements = [x for x in requirements if x not in exclude]
|
365 |
+
|
366 |
+
n = 0 # number of packages updates
|
367 |
+
for i, r in enumerate(requirements):
|
368 |
+
try:
|
369 |
+
pkg.require(r)
|
370 |
+
except Exception: # DistributionNotFound or VersionConflict if requirements not met
|
371 |
+
s = f"{prefix} {r} not found and is required by YOLOv5"
|
372 |
+
if install and AUTOINSTALL: # check environment variable
|
373 |
+
LOGGER.info(f"{s}, attempting auto-update...")
|
374 |
+
try:
|
375 |
+
assert check_online(), f"'pip install {r}' skipped (offline)"
|
376 |
+
LOGGER.info(check_output(f'pip install "{r}" {cmds[i] if cmds else ""}', shell=True).decode())
|
377 |
+
n += 1
|
378 |
+
except Exception as e:
|
379 |
+
LOGGER.warning(f'{prefix} {e}')
|
380 |
+
else:
|
381 |
+
LOGGER.info(f'{s}. Please install and rerun your command.')
|
382 |
+
|
383 |
+
if n: # if packages updated
|
384 |
+
source = file.resolve() if 'file' in locals() else requirements
|
385 |
+
s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
|
386 |
+
f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
|
387 |
+
LOGGER.info(emojis(s))
|
388 |
+
|
389 |
+
|
390 |
+
def check_img_size(imgsz, s=32, floor=0):
|
391 |
+
# Verify image size is a multiple of stride s in each dimension
|
392 |
+
if isinstance(imgsz, int): # integer i.e. img_size=640
|
393 |
+
new_size = max(make_divisible(imgsz, int(s)), floor)
|
394 |
+
else: # list i.e. img_size=[640, 480]
|
395 |
+
imgsz = list(imgsz) # convert to list if tuple
|
396 |
+
new_size = [max(make_divisible(x, int(s)), floor) for x in imgsz]
|
397 |
+
if new_size != imgsz:
|
398 |
+
LOGGER.warning(f'WARNING: --img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
|
399 |
+
return new_size
|
400 |
+
|
401 |
+
|
402 |
+
def check_imshow():
|
403 |
+
# Check if environment supports image displays
|
404 |
+
try:
|
405 |
+
assert not is_docker(), 'cv2.imshow() is disabled in Docker environments'
|
406 |
+
assert not is_colab(), 'cv2.imshow() is disabled in Google Colab environments'
|
407 |
+
cv2.imshow('test', np.zeros((1, 1, 3)))
|
408 |
+
cv2.waitKey(1)
|
409 |
+
cv2.destroyAllWindows()
|
410 |
+
cv2.waitKey(1)
|
411 |
+
return True
|
412 |
+
except Exception as e:
|
413 |
+
LOGGER.warning(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
|
414 |
+
return False
|
415 |
+
|
416 |
+
|
417 |
+
def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
|
418 |
+
# Check file(s) for acceptable suffix
|
419 |
+
if file and suffix:
|
420 |
+
if isinstance(suffix, str):
|
421 |
+
suffix = [suffix]
|
422 |
+
for f in file if isinstance(file, (list, tuple)) else [file]:
|
423 |
+
s = Path(f).suffix.lower() # file suffix
|
424 |
+
if len(s):
|
425 |
+
assert s in suffix, f"{msg}{f} acceptable suffix is {suffix}"
|
426 |
+
|
427 |
+
|
428 |
+
def check_yaml(file, suffix=('.yaml', '.yml')):
|
429 |
+
# Search/download YAML file (if necessary) and return path, checking suffix
|
430 |
+
return check_file(file, suffix)
|
431 |
+
|
432 |
+
|
433 |
+
def check_file(file, suffix=''):
|
434 |
+
# Search/download file (if necessary) and return path
|
435 |
+
check_suffix(file, suffix) # optional
|
436 |
+
file = str(file) # convert to str()
|
437 |
+
if Path(file).is_file() or not file: # exists
|
438 |
+
return file
|
439 |
+
elif file.startswith(('http:/', 'https:/')): # download
|
440 |
+
url = file # warning: Pathlib turns :// -> :/
|
441 |
+
file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth
|
442 |
+
if Path(file).is_file():
|
443 |
+
LOGGER.info(f'Found {url} locally at {file}') # file already exists
|
444 |
+
else:
|
445 |
+
LOGGER.info(f'Downloading {url} to {file}...')
|
446 |
+
torch.hub.download_url_to_file(url, file)
|
447 |
+
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check
|
448 |
+
return file
|
449 |
+
else: # search
|
450 |
+
files = []
|
451 |
+
for d in 'data', 'models', 'utils': # search directories
|
452 |
+
files.extend(glob.glob(str(ROOT / d / '**' / file), recursive=True)) # find file
|
453 |
+
assert len(files), f'File not found: {file}' # assert file was found
|
454 |
+
assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique
|
455 |
+
return files[0] # return file
|
456 |
+
|
457 |
+
|
458 |
+
def check_font(font=FONT, progress=False):
|
459 |
+
# Download font to CONFIG_DIR if necessary
|
460 |
+
font = Path(font)
|
461 |
+
file = CONFIG_DIR / font.name
|
462 |
+
if not font.exists() and not file.exists():
|
463 |
+
url = "https://ultralytics.com/assets/" + font.name
|
464 |
+
LOGGER.info(f'Downloading {url} to {file}...')
|
465 |
+
torch.hub.download_url_to_file(url, str(file), progress=progress)
|
466 |
+
|
467 |
+
|
468 |
+
def check_dataset(data, autodownload=True):
|
469 |
+
# Download, check and/or unzip dataset if not found locally
|
470 |
+
|
471 |
+
# Download (optional)
|
472 |
+
extract_dir = ''
|
473 |
+
if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
|
474 |
+
download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False, threads=1)
|
475 |
+
data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml'))
|
476 |
+
extract_dir, autodownload = data.parent, False
|
477 |
+
|
478 |
+
# Read yaml (optional)
|
479 |
+
if isinstance(data, (str, Path)):
|
480 |
+
with open(data, errors='ignore') as f:
|
481 |
+
data = yaml.safe_load(f) # dictionary
|
482 |
+
|
483 |
+
# Checks
|
484 |
+
for k in 'train', 'val', 'nc':
|
485 |
+
assert k in data, emojis(f"data.yaml '{k}:' field missing ❌")
|
486 |
+
if 'names' not in data:
|
487 |
+
LOGGER.warning(emojis("data.yaml 'names:' field missing ⚠️, assigning default names 'class0', 'class1', etc."))
|
488 |
+
data['names'] = [f'class{i}' for i in range(data['nc'])] # default names
|
489 |
+
|
490 |
+
# Resolve paths
|
491 |
+
path = Path(extract_dir or data.get('path') or '') # optional 'path' default to '.'
|
492 |
+
if not path.is_absolute():
|
493 |
+
path = (ROOT / path).resolve()
|
494 |
+
for k in 'train', 'val', 'test':
|
495 |
+
if data.get(k): # prepend path
|
496 |
+
data[k] = str(path / data[k]) if isinstance(data[k], str) else [str(path / x) for x in data[k]]
|
497 |
+
|
498 |
+
# Parse yaml
|
499 |
+
train, val, test, s = (data.get(x) for x in ('train', 'val', 'test', 'download'))
|
500 |
+
if val:
|
501 |
+
val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path
|
502 |
+
if not all(x.exists() for x in val):
|
503 |
+
LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]))
|
504 |
+
if not s or not autodownload:
|
505 |
+
raise Exception(emojis('Dataset not found ❌'))
|
506 |
+
t = time.time()
|
507 |
+
root = path.parent if 'path' in data else '..' # unzip directory i.e. '../'
|
508 |
+
if s.startswith('http') and s.endswith('.zip'): # URL
|
509 |
+
f = Path(s).name # filename
|
510 |
+
LOGGER.info(f'Downloading {s} to {f}...')
|
511 |
+
torch.hub.download_url_to_file(s, f)
|
512 |
+
Path(root).mkdir(parents=True, exist_ok=True) # create root
|
513 |
+
ZipFile(f).extractall(path=root) # unzip
|
514 |
+
Path(f).unlink() # remove zip
|
515 |
+
r = None # success
|
516 |
+
elif s.startswith('bash '): # bash script
|
517 |
+
LOGGER.info(f'Running {s} ...')
|
518 |
+
r = os.system(s)
|
519 |
+
else: # python script
|
520 |
+
r = exec(s, {'yaml': data}) # return None
|
521 |
+
dt = f'({round(time.time() - t, 1)}s)'
|
522 |
+
s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
|
523 |
+
LOGGER.info(emojis(f"Dataset download {s}"))
|
524 |
+
check_font('Arial.ttf' if is_ascii(data['names']) else 'Arial.Unicode.ttf', progress=True) # download fonts
|
525 |
+
return data # dictionary
|
526 |
+
|
527 |
+
|
528 |
+
def check_amp(model):
|
529 |
+
# Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation
|
530 |
+
from asone.detectors.yolov5.utils.common import AutoShape, DetectMultiBackend
|
531 |
+
|
532 |
+
def amp_allclose(model, im):
|
533 |
+
# All close FP32 vs AMP results
|
534 |
+
m = AutoShape(model, verbose=False) # model
|
535 |
+
a = m(im).xywhn[0] # FP32 inference
|
536 |
+
m.amp = True
|
537 |
+
b = m(im).xywhn[0] # AMP inference
|
538 |
+
return a.shape == b.shape and torch.allclose(a, b, atol=0.1) # close to 10% absolute tolerance
|
539 |
+
|
540 |
+
prefix = colorstr('AMP: ')
|
541 |
+
device = next(model.parameters()).device # get model device
|
542 |
+
if device.type == 'cpu':
|
543 |
+
return False # AMP disabled on CPU
|
544 |
+
f = ROOT / 'data' / 'images' / 'bus.jpg' # image to check
|
545 |
+
im = f if f.exists() else 'https://ultralytics.com/images/bus.jpg' if check_online() else np.ones((640, 640, 3))
|
546 |
+
try:
|
547 |
+
assert amp_allclose(model, im) or amp_allclose(DetectMultiBackend('yolov5n.pt', device), im)
|
548 |
+
LOGGER.info(emojis(f'{prefix}checks passed ✅'))
|
549 |
+
return True
|
550 |
+
except Exception:
|
551 |
+
help_url = 'https://github.com/ultralytics/yolov5/issues/7908'
|
552 |
+
LOGGER.warning(emojis(f'{prefix}checks failed ❌, disabling Automatic Mixed Precision. See {help_url}'))
|
553 |
+
return False
|
554 |
+
|
555 |
+
|
556 |
+
def url2file(url):
|
557 |
+
# Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
|
558 |
+
url = str(Path(url)).replace(':/', '://') # Pathlib turns :// -> :/
|
559 |
+
return Path(urllib.parse.unquote(url)).name.split('?')[0] # '%2F' to '/', split https://url.com/file.txt?auth
|
560 |
+
|
561 |
+
|
562 |
+
def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1, retry=3):
|
563 |
+
# Multi-threaded file download and unzip function, used in data.yaml for autodownload
|
564 |
+
def download_one(url, dir):
|
565 |
+
# Download 1 file
|
566 |
+
success = True
|
567 |
+
f = dir / Path(url).name # filename
|
568 |
+
if Path(url).is_file(): # exists in current path
|
569 |
+
Path(url).rename(f) # move to dir
|
570 |
+
elif not f.exists():
|
571 |
+
LOGGER.info(f'Downloading {url} to {f}...')
|
572 |
+
for i in range(retry + 1):
|
573 |
+
if curl:
|
574 |
+
s = 'sS' if threads > 1 else '' # silent
|
575 |
+
r = os.system(f'curl -{s}L "{url}" -o "{f}" --retry 9 -C -') # curl download with retry, continue
|
576 |
+
success = r == 0
|
577 |
+
else:
|
578 |
+
torch.hub.download_url_to_file(url, f, progress=threads == 1) # torch download
|
579 |
+
success = f.is_file()
|
580 |
+
if success:
|
581 |
+
break
|
582 |
+
elif i < retry:
|
583 |
+
LOGGER.warning(f'Download failure, retrying {i + 1}/{retry} {url}...')
|
584 |
+
else:
|
585 |
+
LOGGER.warning(f'Failed to download {url}...')
|
586 |
+
|
587 |
+
if unzip and success and f.suffix in ('.zip', '.gz'):
|
588 |
+
LOGGER.info(f'Unzipping {f}...')
|
589 |
+
if f.suffix == '.zip':
|
590 |
+
ZipFile(f).extractall(path=dir) # unzip
|
591 |
+
elif f.suffix == '.gz':
|
592 |
+
os.system(f'tar xfz {f} --directory {f.parent}') # unzip
|
593 |
+
if delete:
|
594 |
+
f.unlink() # remove zip
|
595 |
+
|
596 |
+
dir = Path(dir)
|
597 |
+
dir.mkdir(parents=True, exist_ok=True) # make directory
|
598 |
+
if threads > 1:
|
599 |
+
pool = ThreadPool(threads)
|
600 |
+
pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
|
601 |
+
pool.close()
|
602 |
+
pool.join()
|
603 |
+
else:
|
604 |
+
for u in [url] if isinstance(url, (str, Path)) else url:
|
605 |
+
download_one(u, dir)
|
606 |
+
|
607 |
+
|
608 |
+
def make_divisible(x, divisor):
|
609 |
+
# Returns nearest x divisible by divisor
|
610 |
+
if isinstance(divisor, torch.Tensor):
|
611 |
+
divisor = int(divisor.max()) # to int
|
612 |
+
return math.ceil(x / divisor) * divisor
|
613 |
+
|
614 |
+
|
615 |
+
def clean_str(s):
|
616 |
+
# Cleans a string by replacing special characters with underscore _
|
617 |
+
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
|
618 |
+
|
619 |
+
|
620 |
+
def one_cycle(y1=0.0, y2=1.0, steps=100):
|
621 |
+
# lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
|
622 |
+
return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
|
623 |
+
|
624 |
+
|
625 |
+
def colorstr(*input):
|
626 |
+
# Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
|
627 |
+
*args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
|
628 |
+
colors = {
|
629 |
+
'black': '\033[30m', # basic colors
|
630 |
+
'red': '\033[31m',
|
631 |
+
'green': '\033[32m',
|
632 |
+
'yellow': '\033[33m',
|
633 |
+
'blue': '\033[34m',
|
634 |
+
'magenta': '\033[35m',
|
635 |
+
'cyan': '\033[36m',
|
636 |
+
'white': '\033[37m',
|
637 |
+
'bright_black': '\033[90m', # bright colors
|
638 |
+
'bright_red': '\033[91m',
|
639 |
+
'bright_green': '\033[92m',
|
640 |
+
'bright_yellow': '\033[93m',
|
641 |
+
'bright_blue': '\033[94m',
|
642 |
+
'bright_magenta': '\033[95m',
|
643 |
+
'bright_cyan': '\033[96m',
|
644 |
+
'bright_white': '\033[97m',
|
645 |
+
'end': '\033[0m', # misc
|
646 |
+
'bold': '\033[1m',
|
647 |
+
'underline': '\033[4m'}
|
648 |
+
return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
|
649 |
+
|
650 |
+
|
651 |
+
def labels_to_class_weights(labels, nc=80):
|
652 |
+
# Get class weights (inverse frequency) from training labels
|
653 |
+
if labels[0] is None: # no labels loaded
|
654 |
+
return torch.Tensor()
|
655 |
+
|
656 |
+
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
|
657 |
+
classes = labels[:, 0].astype(int) # labels = [class xywh]
|
658 |
+
weights = np.bincount(classes, minlength=nc) # occurrences per class
|
659 |
+
|
660 |
+
# Prepend gridpoint count (for uCE training)
|
661 |
+
# gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
|
662 |
+
# weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
|
663 |
+
|
664 |
+
weights[weights == 0] = 1 # replace empty bins with 1
|
665 |
+
weights = 1 / weights # number of targets per class
|
666 |
+
weights /= weights.sum() # normalize
|
667 |
+
return torch.from_numpy(weights).float()
|
668 |
+
|
669 |
+
|
670 |
+
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
|
671 |
+
# Produces image weights based on class_weights and image contents
|
672 |
+
# Usage: index = random.choices(range(n), weights=image_weights, k=1) # weighted image sample
|
673 |
+
class_counts = np.array([np.bincount(x[:, 0].astype(int), minlength=nc) for x in labels])
|
674 |
+
return (class_weights.reshape(1, nc) * class_counts).sum(1)
|
675 |
+
|
676 |
+
|
677 |
+
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
|
678 |
+
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
679 |
+
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
|
680 |
+
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
|
681 |
+
# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
|
682 |
+
# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
|
683 |
+
return [
|
684 |
+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
|
685 |
+
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
686 |
+
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
|
687 |
+
|
688 |
+
|
689 |
+
def xyxy2xywh(x):
|
690 |
+
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
|
691 |
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
692 |
+
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
|
693 |
+
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
|
694 |
+
y[:, 2] = x[:, 2] - x[:, 0] # width
|
695 |
+
y[:, 3] = x[:, 3] - x[:, 1] # height
|
696 |
+
return y
|
697 |
+
|
698 |
+
|
699 |
+
def xywh2xyxy(x):
|
700 |
+
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
701 |
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
702 |
+
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
|
703 |
+
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
|
704 |
+
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
|
705 |
+
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
|
706 |
+
return y
|
707 |
+
|
708 |
+
|
709 |
+
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
710 |
+
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
711 |
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
712 |
+
y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
|
713 |
+
y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y
|
714 |
+
y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x
|
715 |
+
y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y
|
716 |
+
return y
|
717 |
+
|
718 |
+
|
719 |
+
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
720 |
+
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
|
721 |
+
if clip:
|
722 |
+
clip_coords(x, (h - eps, w - eps)) # warning: inplace clip
|
723 |
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
724 |
+
y[:, 0] = ((x[:, 0] + x[:, 2]) / 2) / w # x center
|
725 |
+
y[:, 1] = ((x[:, 1] + x[:, 3]) / 2) / h # y center
|
726 |
+
y[:, 2] = (x[:, 2] - x[:, 0]) / w # width
|
727 |
+
y[:, 3] = (x[:, 3] - x[:, 1]) / h # height
|
728 |
+
return y
|
729 |
+
|
730 |
+
|
731 |
+
def xyn2xy(x, w=640, h=640, padw=0, padh=0):
|
732 |
+
# Convert normalized segments into pixel segments, shape (n,2)
|
733 |
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
734 |
+
y[:, 0] = w * x[:, 0] + padw # top left x
|
735 |
+
y[:, 1] = h * x[:, 1] + padh # top left y
|
736 |
+
return y
|
737 |
+
|
738 |
+
|
739 |
+
def segment2box(segment, width=640, height=640):
|
740 |
+
# Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
|
741 |
+
x, y = segment.T # segment xy
|
742 |
+
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
|
743 |
+
x, y, = x[inside], y[inside]
|
744 |
+
return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy
|
745 |
+
|
746 |
+
|
747 |
+
def segments2boxes(segments):
|
748 |
+
# Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
|
749 |
+
boxes = []
|
750 |
+
for s in segments:
|
751 |
+
x, y = s.T # segment xy
|
752 |
+
boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
|
753 |
+
return xyxy2xywh(np.array(boxes)) # cls, xywh
|
754 |
+
|
755 |
+
|
756 |
+
def resample_segments(segments, n=1000):
|
757 |
+
# Up-sample an (n,2) segment
|
758 |
+
for i, s in enumerate(segments):
|
759 |
+
s = np.concatenate((s, s[0:1, :]), axis=0)
|
760 |
+
x = np.linspace(0, len(s) - 1, n)
|
761 |
+
xp = np.arange(len(s))
|
762 |
+
segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy
|
763 |
+
return segments
|
764 |
+
|
765 |
+
|
766 |
+
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
|
767 |
+
# Rescale coords (xyxy) from img1_shape to img0_shape
|
768 |
+
if ratio_pad is None: # calculate from img0_shape
|
769 |
+
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
770 |
+
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
771 |
+
else:
|
772 |
+
gain = ratio_pad[0][0]
|
773 |
+
pad = ratio_pad[1]
|
774 |
+
|
775 |
+
coords[:, [0, 2]] -= pad[0] # x padding
|
776 |
+
coords[:, [1, 3]] -= pad[1] # y padding
|
777 |
+
coords[:, :4] /= gain
|
778 |
+
clip_coords(coords, img0_shape)
|
779 |
+
return coords
|
780 |
+
|
781 |
+
|
782 |
+
def clip_coords(boxes, shape):
|
783 |
+
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
784 |
+
if isinstance(boxes, torch.Tensor): # faster individually
|
785 |
+
boxes[:, 0].clamp_(0, shape[1]) # x1
|
786 |
+
boxes[:, 1].clamp_(0, shape[0]) # y1
|
787 |
+
boxes[:, 2].clamp_(0, shape[1]) # x2
|
788 |
+
boxes[:, 3].clamp_(0, shape[0]) # y2
|
789 |
+
else: # np.array (faster grouped)
|
790 |
+
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
|
791 |
+
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
|
792 |
+
|
793 |
+
|
794 |
+
def non_max_suppression(prediction,
|
795 |
+
conf_thres=0.25,
|
796 |
+
iou_thres=0.45,
|
797 |
+
classes=None,
|
798 |
+
agnostic=False,
|
799 |
+
multi_label=False,
|
800 |
+
labels=(),
|
801 |
+
max_det=300):
|
802 |
+
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
|
803 |
+
|
804 |
+
Returns:
|
805 |
+
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
|
806 |
+
"""
|
807 |
+
|
808 |
+
bs = prediction.shape[0] # batch size
|
809 |
+
nc = prediction.shape[2] - 5 # number of classes
|
810 |
+
xc = prediction[..., 4] > conf_thres # candidates
|
811 |
+
|
812 |
+
# Checks
|
813 |
+
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
814 |
+
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
815 |
+
|
816 |
+
# Settings
|
817 |
+
# min_wh = 2 # (pixels) minimum box width and height
|
818 |
+
max_wh = 7680 # (pixels) maximum box width and height
|
819 |
+
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
|
820 |
+
time_limit = 0.3 + 0.03 * bs # seconds to quit after
|
821 |
+
redundant = True # require redundant detections
|
822 |
+
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
823 |
+
merge = False # use merge-NMS
|
824 |
+
|
825 |
+
t = time.time()
|
826 |
+
output = [torch.zeros((0, 6), device=prediction.device)] * bs
|
827 |
+
for xi, x in enumerate(prediction): # image index, image inference
|
828 |
+
# Apply constraints
|
829 |
+
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
830 |
+
x = x[xc[xi]] # confidence
|
831 |
+
|
832 |
+
# Cat apriori labels if autolabelling
|
833 |
+
if labels and len(labels[xi]):
|
834 |
+
lb = labels[xi]
|
835 |
+
v = torch.zeros((len(lb), nc + 5), device=x.device)
|
836 |
+
v[:, :4] = lb[:, 1:5] # box
|
837 |
+
v[:, 4] = 1.0 # conf
|
838 |
+
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
|
839 |
+
x = torch.cat((x, v), 0)
|
840 |
+
|
841 |
+
# If none remain process next image
|
842 |
+
if not x.shape[0]:
|
843 |
+
continue
|
844 |
+
|
845 |
+
# Compute conf
|
846 |
+
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
|
847 |
+
|
848 |
+
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
|
849 |
+
box = xywh2xyxy(x[:, :4])
|
850 |
+
|
851 |
+
# Detections matrix nx6 (xyxy, conf, cls)
|
852 |
+
if multi_label:
|
853 |
+
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
|
854 |
+
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
|
855 |
+
else: # best class only
|
856 |
+
conf, j = x[:, 5:].max(1, keepdim=True)
|
857 |
+
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
|
858 |
+
|
859 |
+
# Filter by class
|
860 |
+
if classes is not None:
|
861 |
+
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
862 |
+
|
863 |
+
# Apply finite constraint
|
864 |
+
# if not torch.isfinite(x).all():
|
865 |
+
# x = x[torch.isfinite(x).all(1)]
|
866 |
+
|
867 |
+
# Check shape
|
868 |
+
n = x.shape[0] # number of boxes
|
869 |
+
if not n: # no boxes
|
870 |
+
continue
|
871 |
+
elif n > max_nms: # excess boxes
|
872 |
+
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
|
873 |
+
|
874 |
+
# Batched NMS
|
875 |
+
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
876 |
+
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
877 |
+
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
878 |
+
if i.shape[0] > max_det: # limit detections
|
879 |
+
i = i[:max_det]
|
880 |
+
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
881 |
+
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
882 |
+
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
883 |
+
weights = iou * scores[None] # box weights
|
884 |
+
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
885 |
+
if redundant:
|
886 |
+
i = i[iou.sum(1) > 1] # require redundancy
|
887 |
+
|
888 |
+
output[xi] = x[i]
|
889 |
+
if (time.time() - t) > time_limit:
|
890 |
+
LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
|
891 |
+
break # time limit exceeded
|
892 |
+
|
893 |
+
return output
|
894 |
+
|
895 |
+
|
896 |
+
def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer()
|
897 |
+
# Strip optimizer from 'f' to finalize training, optionally save as 's'
|
898 |
+
x = torch.load(f, map_location=torch.device('cpu'))
|
899 |
+
if x.get('ema'):
|
900 |
+
x['model'] = x['ema'] # replace model with ema
|
901 |
+
for k in 'optimizer', 'best_fitness', 'wandb_id', 'ema', 'updates': # keys
|
902 |
+
x[k] = None
|
903 |
+
x['epoch'] = -1
|
904 |
+
x['model'].half() # to FP16
|
905 |
+
for p in x['model'].parameters():
|
906 |
+
p.requires_grad = False
|
907 |
+
torch.save(x, s or f)
|
908 |
+
mb = os.path.getsize(s or f) / 1E6 # filesize
|
909 |
+
LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
|
910 |
+
|
911 |
+
|
912 |
+
def print_mutation(results, hyp, save_dir, bucket, prefix=colorstr('evolve: ')):
|
913 |
+
evolve_csv = save_dir / 'evolve.csv'
|
914 |
+
evolve_yaml = save_dir / 'hyp_evolve.yaml'
|
915 |
+
keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss',
|
916 |
+
'val/obj_loss', 'val/cls_loss') + tuple(hyp.keys()) # [results + hyps]
|
917 |
+
keys = tuple(x.strip() for x in keys)
|
918 |
+
vals = results + tuple(hyp.values())
|
919 |
+
n = len(keys)
|
920 |
+
|
921 |
+
# Download (optional)
|
922 |
+
if bucket:
|
923 |
+
url = f'gs://{bucket}/evolve.csv'
|
924 |
+
if gsutil_getsize(url) > (evolve_csv.stat().st_size if evolve_csv.exists() else 0):
|
925 |
+
os.system(f'gsutil cp {url} {save_dir}') # download evolve.csv if larger than local
|
926 |
+
|
927 |
+
# Log to evolve.csv
|
928 |
+
s = '' if evolve_csv.exists() else (('%20s,' * n % keys).rstrip(',') + '\n') # add header
|
929 |
+
with open(evolve_csv, 'a') as f:
|
930 |
+
f.write(s + ('%20.5g,' * n % vals).rstrip(',') + '\n')
|
931 |
+
|
932 |
+
# Save yaml
|
933 |
+
with open(evolve_yaml, 'w') as f:
|
934 |
+
data = pd.read_csv(evolve_csv)
|
935 |
+
data = data.rename(columns=lambda x: x.strip()) # strip keys
|
936 |
+
i = np.argmax(fitness(data.values[:, :4])) #
|
937 |
+
generations = len(data)
|
938 |
+
f.write('# YOLOv5 Hyperparameter Evolution Results\n' + f'# Best generation: {i}\n' +
|
939 |
+
f'# Last generation: {generations - 1}\n' + '# ' + ', '.join(f'{x.strip():>20s}' for x in keys[:7]) +
|
940 |
+
'\n' + '# ' + ', '.join(f'{x:>20.5g}' for x in data.values[i, :7]) + '\n\n')
|
941 |
+
yaml.safe_dump(data.loc[i][7:].to_dict(), f, sort_keys=False)
|
942 |
+
|
943 |
+
# Print to screen
|
944 |
+
LOGGER.info(prefix + f'{generations} generations finished, current result:\n' + prefix +
|
945 |
+
', '.join(f'{x.strip():>20s}' for x in keys) + '\n' + prefix + ', '.join(f'{x:20.5g}'
|
946 |
+
for x in vals) + '\n\n')
|
947 |
+
|
948 |
+
if bucket:
|
949 |
+
os.system(f'gsutil cp {evolve_csv} {evolve_yaml} gs://{bucket}') # upload
|
950 |
+
|
951 |
+
|
952 |
+
def apply_classifier(x, model, img, im0):
|
953 |
+
# Apply a second stage classifier to YOLO outputs
|
954 |
+
# Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval()
|
955 |
+
im0 = [im0] if isinstance(im0, np.ndarray) else im0
|
956 |
+
for i, d in enumerate(x): # per image
|
957 |
+
if d is not None and len(d):
|
958 |
+
d = d.clone()
|
959 |
+
|
960 |
+
# Reshape and pad cutouts
|
961 |
+
b = xyxy2xywh(d[:, :4]) # boxes
|
962 |
+
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square
|
963 |
+
b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad
|
964 |
+
d[:, :4] = xywh2xyxy(b).long()
|
965 |
+
|
966 |
+
# Rescale boxes from img_size to im0 size
|
967 |
+
scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
|
968 |
+
|
969 |
+
# Classes
|
970 |
+
pred_cls1 = d[:, 5].long()
|
971 |
+
ims = []
|
972 |
+
for a in d:
|
973 |
+
cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
|
974 |
+
im = cv2.resize(cutout, (224, 224)) # BGR
|
975 |
+
|
976 |
+
im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
|
977 |
+
im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32
|
978 |
+
im /= 255 # 0 - 255 to 0.0 - 1.0
|
979 |
+
ims.append(im)
|
980 |
+
|
981 |
+
pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction
|
982 |
+
x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections
|
983 |
+
|
984 |
+
return x
|
985 |
+
|
986 |
+
|
987 |
+
def increment_path(path, exist_ok=False, sep='', mkdir=False):
|
988 |
+
# Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
|
989 |
+
path = Path(path) # os-agnostic
|
990 |
+
if path.exists() and not exist_ok:
|
991 |
+
path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
|
992 |
+
|
993 |
+
# Method 1
|
994 |
+
for n in range(2, 9999):
|
995 |
+
p = f'{path}{sep}{n}{suffix}' # increment path
|
996 |
+
if not os.path.exists(p): #
|
997 |
+
break
|
998 |
+
path = Path(p)
|
999 |
+
|
1000 |
+
# Method 2 (deprecated)
|
1001 |
+
# dirs = glob.glob(f"{path}{sep}*") # similar paths
|
1002 |
+
# matches = [re.search(rf"{path.stem}{sep}(\d+)", d) for d in dirs]
|
1003 |
+
# i = [int(m.groups()[0]) for m in matches if m] # indices
|
1004 |
+
# n = max(i) + 1 if i else 2 # increment number
|
1005 |
+
# path = Path(f"{path}{sep}{n}{suffix}") # increment path
|
1006 |
+
|
1007 |
+
if mkdir:
|
1008 |
+
path.mkdir(parents=True, exist_ok=True) # make directory
|
1009 |
+
|
1010 |
+
return path
|
1011 |
+
|
1012 |
+
|
1013 |
+
# OpenCV Chinese-friendly functions ------------------------------------------------------------------------------------
|
1014 |
+
imshow_ = cv2.imshow # copy to avoid recursion errors
|
1015 |
+
|
1016 |
+
|
1017 |
+
def imread(path, flags=cv2.IMREAD_COLOR):
|
1018 |
+
return cv2.imdecode(np.fromfile(path, np.uint8), flags)
|
1019 |
+
|
1020 |
+
|
1021 |
+
def imwrite(path, im):
|
1022 |
+
try:
|
1023 |
+
cv2.imencode(Path(path).suffix, im)[1].tofile(path)
|
1024 |
+
return True
|
1025 |
+
except Exception:
|
1026 |
+
return False
|
1027 |
+
|
1028 |
+
|
1029 |
+
def imshow(path, im):
|
1030 |
+
imshow_(path.encode('unicode_escape').decode(), im)
|
1031 |
+
|
1032 |
+
|
1033 |
+
cv2.imread, cv2.imwrite, cv2.imshow = imread, imwrite, imshow # redefine
|
1034 |
+
|
1035 |
+
# Variables ------------------------------------------------------------------------------------------------------------
|
1036 |
+
NCOLS = 0 if is_docker() else shutil.get_terminal_size().columns # terminal window size for tqdm
|
asone/detectors/yolov5/yolov5/models/yolo.py
ADDED
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
2 |
+
"""
|
3 |
+
YOLO-specific modules
|
4 |
+
|
5 |
+
Usage:
|
6 |
+
$ python path/to/models/yolo.py --cfg yolov5s.yaml
|
7 |
+
"""
|
8 |
+
|
9 |
+
import argparse
|
10 |
+
import contextlib
|
11 |
+
import os
|
12 |
+
import platform
|
13 |
+
import sys
|
14 |
+
from copy import deepcopy
|
15 |
+
from pathlib import Path
|
16 |
+
|
17 |
+
FILE = Path(__file__).resolve()
|
18 |
+
ROOT = FILE.parents[1] # YOLOv5 root directory
|
19 |
+
# if str(ROOT) not in sys.path:
|
20 |
+
# sys.path.append(str(ROOT)) # add ROOT to PATH
|
21 |
+
if platform.system() != 'Windows':
|
22 |
+
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
|
23 |
+
|
24 |
+
from asone.detectors.yolov5.yolov5.models.common import *
|
25 |
+
from asone.detectors.yolov5.yolov5.models.experimental import *
|
26 |
+
from asone.detectors.yolov5.yolov5.models.general import (LOGGER, check_version,
|
27 |
+
check_yaml, make_divisible,
|
28 |
+
print_args)
|
29 |
+
from asone.detectors.yolov5.yolov5.utils.torch_utils import (
|
30 |
+
fuse_conv_and_bn,
|
31 |
+
initialize_weights,
|
32 |
+
model_info,
|
33 |
+
profile,
|
34 |
+
scale_img,
|
35 |
+
select_device,
|
36 |
+
time_sync)
|
37 |
+
|
38 |
+
try:
|
39 |
+
import thop # for FLOPs computation
|
40 |
+
except ImportError:
|
41 |
+
thop = None
|
42 |
+
|
43 |
+
|
44 |
+
class Detect(nn.Module):
|
45 |
+
stride = None # strides computed during build
|
46 |
+
onnx_dynamic = False # ONNX export parameter
|
47 |
+
export = False # export mode
|
48 |
+
|
49 |
+
def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
|
50 |
+
super().__init__()
|
51 |
+
self.nc = nc # number of classes
|
52 |
+
self.no = nc + 5 # number of outputs per anchor
|
53 |
+
self.nl = len(anchors) # number of detection layers
|
54 |
+
self.na = len(anchors[0]) // 2 # number of anchors
|
55 |
+
self.grid = [torch.zeros(1)] * self.nl # init grid
|
56 |
+
self.anchor_grid = [torch.zeros(1)] * self.nl # init anchor grid
|
57 |
+
self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
|
58 |
+
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
59 |
+
self.inplace = inplace # use inplace ops (e.g. slice assignment)
|
60 |
+
|
61 |
+
def forward(self, x):
|
62 |
+
z = [] # inference output
|
63 |
+
for i in range(self.nl):
|
64 |
+
x[i] = self.m[i](x[i]) # conv
|
65 |
+
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
66 |
+
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
67 |
+
|
68 |
+
if not self.training: # inference
|
69 |
+
if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
70 |
+
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
|
71 |
+
|
72 |
+
y = x[i].sigmoid()
|
73 |
+
if self.inplace:
|
74 |
+
y[..., 0:2] = (y[..., 0:2] * 2 + self.grid[i]) * self.stride[i] # xy
|
75 |
+
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
76 |
+
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
|
77 |
+
xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0
|
78 |
+
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
|
79 |
+
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
|
80 |
+
y = torch.cat((xy, wh, conf), 4)
|
81 |
+
z.append(y.view(bs, -1, self.no))
|
82 |
+
|
83 |
+
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
|
84 |
+
|
85 |
+
def _make_grid(self, nx=20, ny=20, i=0):
|
86 |
+
d = self.anchors[i].device
|
87 |
+
t = self.anchors[i].dtype
|
88 |
+
shape = 1, self.na, ny, nx, 2 # grid shape
|
89 |
+
y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
|
90 |
+
if check_version(torch.__version__, '1.10.0'): # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility
|
91 |
+
yv, xv = torch.meshgrid(y, x, indexing='ij')
|
92 |
+
else:
|
93 |
+
yv, xv = torch.meshgrid(y, x)
|
94 |
+
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
|
95 |
+
anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
|
96 |
+
return grid, anchor_grid
|
97 |
+
|
98 |
+
|
99 |
+
class DetectionModel(nn.Module):
|
100 |
+
# YOLOv5 model
|
101 |
+
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
|
102 |
+
super().__init__()
|
103 |
+
if isinstance(cfg, dict):
|
104 |
+
self.yaml = cfg # model dict
|
105 |
+
else: # is *.yaml
|
106 |
+
import yaml # for torch hub
|
107 |
+
self.yaml_file = Path(cfg).name
|
108 |
+
with open(cfg, encoding='ascii', errors='ignore') as f:
|
109 |
+
self.yaml = yaml.safe_load(f) # model dict
|
110 |
+
|
111 |
+
# Define model
|
112 |
+
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
|
113 |
+
if nc and nc != self.yaml['nc']:
|
114 |
+
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
|
115 |
+
self.yaml['nc'] = nc # override yaml value
|
116 |
+
if anchors:
|
117 |
+
LOGGER.info(f'Overriding model.yaml anchors with anchors={anchors}')
|
118 |
+
self.yaml['anchors'] = round(anchors) # override yaml value
|
119 |
+
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
|
120 |
+
self.names = [str(i) for i in range(self.yaml['nc'])] # default names
|
121 |
+
self.inplace = self.yaml.get('inplace', True)
|
122 |
+
|
123 |
+
# Build strides, anchors
|
124 |
+
m = self.model[-1] # Detect()
|
125 |
+
if isinstance(m, Detect):
|
126 |
+
s = 256 # 2x min stride
|
127 |
+
m.inplace = self.inplace
|
128 |
+
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
|
129 |
+
check_anchor_order(m) # must be in pixel-space (not grid-space)
|
130 |
+
m.anchors /= m.stride.view(-1, 1, 1)
|
131 |
+
self.stride = m.stride
|
132 |
+
self._initialize_biases() # only run once
|
133 |
+
|
134 |
+
# Init weights, biases
|
135 |
+
initialize_weights(self)
|
136 |
+
self.info()
|
137 |
+
LOGGER.info('')
|
138 |
+
|
139 |
+
def forward(self, x, augment=False, profile=False, visualize=False):
|
140 |
+
if augment:
|
141 |
+
return self._forward_augment(x) # augmented inference, None
|
142 |
+
return self._forward_once(x, profile, visualize) # single-scale inference, train
|
143 |
+
|
144 |
+
def _forward_augment(self, x):
|
145 |
+
img_size = x.shape[-2:] # height, width
|
146 |
+
s = [1, 0.83, 0.67] # scales
|
147 |
+
f = [None, 3, None] # flips (2-ud, 3-lr)
|
148 |
+
y = [] # outputs
|
149 |
+
for si, fi in zip(s, f):
|
150 |
+
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
|
151 |
+
yi = self._forward_once(xi)[0] # forward
|
152 |
+
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
|
153 |
+
yi = self._descale_pred(yi, fi, si, img_size)
|
154 |
+
y.append(yi)
|
155 |
+
y = self._clip_augmented(y) # clip augmented tails
|
156 |
+
return torch.cat(y, 1), None # augmented inference, train
|
157 |
+
|
158 |
+
def _forward_once(self, x, profile=False, visualize=False):
|
159 |
+
y, dt = [], [] # outputs
|
160 |
+
for m in self.model:
|
161 |
+
if m.f != -1: # if not from previous layer
|
162 |
+
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
|
163 |
+
if profile:
|
164 |
+
self._profile_one_layer(m, x, dt)
|
165 |
+
x = m(x) # run
|
166 |
+
y.append(x if m.i in self.save else None) # save output
|
167 |
+
if visualize:
|
168 |
+
feature_visualization(x, m.type, m.i, save_dir=visualize)
|
169 |
+
return x
|
170 |
+
|
171 |
+
def _descale_pred(self, p, flips, scale, img_size):
|
172 |
+
# de-scale predictions following augmented inference (inverse operation)
|
173 |
+
if self.inplace:
|
174 |
+
p[..., :4] /= scale # de-scale
|
175 |
+
if flips == 2:
|
176 |
+
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
|
177 |
+
elif flips == 3:
|
178 |
+
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
|
179 |
+
else:
|
180 |
+
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
|
181 |
+
if flips == 2:
|
182 |
+
y = img_size[0] - y # de-flip ud
|
183 |
+
elif flips == 3:
|
184 |
+
x = img_size[1] - x # de-flip lr
|
185 |
+
p = torch.cat((x, y, wh, p[..., 4:]), -1)
|
186 |
+
return p
|
187 |
+
|
188 |
+
def _clip_augmented(self, y):
|
189 |
+
# Clip YOLOv5 augmented inference tails
|
190 |
+
nl = self.model[-1].nl # number of detection layers (P3-P5)
|
191 |
+
g = sum(4 ** x for x in range(nl)) # grid points
|
192 |
+
e = 1 # exclude layer count
|
193 |
+
i = (y[0].shape[1] // g) * sum(4 ** x for x in range(e)) # indices
|
194 |
+
y[0] = y[0][:, :-i] # large
|
195 |
+
i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
|
196 |
+
y[-1] = y[-1][:, i:] # small
|
197 |
+
return y
|
198 |
+
|
199 |
+
def _profile_one_layer(self, m, x, dt):
|
200 |
+
c = isinstance(m, Detect) # is final layer, copy input as inplace fix
|
201 |
+
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPs
|
202 |
+
t = time_sync()
|
203 |
+
for _ in range(10):
|
204 |
+
m(x.copy() if c else x)
|
205 |
+
dt.append((time_sync() - t) * 100)
|
206 |
+
if m == self.model[0]:
|
207 |
+
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
|
208 |
+
LOGGER.info(f'{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}')
|
209 |
+
if c:
|
210 |
+
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
|
211 |
+
|
212 |
+
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
|
213 |
+
# https://arxiv.org/abs/1708.02002 section 3.3
|
214 |
+
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
|
215 |
+
m = self.model[-1] # Detect() module
|
216 |
+
for mi, s in zip(m.m, m.stride): # from
|
217 |
+
b = mi.bias.view(m.na, -1).detach() # conv.bias(255) to (3,85)
|
218 |
+
b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
219 |
+
b[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls
|
220 |
+
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
221 |
+
|
222 |
+
def _print_biases(self):
|
223 |
+
m = self.model[-1] # Detect() module
|
224 |
+
for mi in m.m: # from
|
225 |
+
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
|
226 |
+
LOGGER.info(
|
227 |
+
('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
|
228 |
+
|
229 |
+
def _print_weights(self):
|
230 |
+
for m in self.model.modules():
|
231 |
+
if type(m) is Bottleneck:
|
232 |
+
LOGGER.info('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
|
233 |
+
|
234 |
+
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
235 |
+
# LOGGER.info('Fusing layers... ')
|
236 |
+
for m in self.model.modules():
|
237 |
+
if isinstance(m, (Conv, DWConv)) and hasattr(m, 'bn'):
|
238 |
+
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
|
239 |
+
delattr(m, 'bn') # remove batchnorm
|
240 |
+
m.forward = m.forward_fuse # update forward
|
241 |
+
# self.info()
|
242 |
+
return self
|
243 |
+
|
244 |
+
def info(self, verbose=False, img_size=640): # print model information
|
245 |
+
model_info(self, verbose, img_size)
|
246 |
+
|
247 |
+
def _apply(self, fn):
|
248 |
+
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
249 |
+
self = super()._apply(fn)
|
250 |
+
m = self.model[-1] # Detect()
|
251 |
+
if isinstance(m, Detect):
|
252 |
+
m.stride = fn(m.stride)
|
253 |
+
m.grid = list(map(fn, m.grid))
|
254 |
+
if isinstance(m.anchor_grid, list):
|
255 |
+
m.anchor_grid = list(map(fn, m.anchor_grid))
|
256 |
+
return self
|
257 |
+
|
258 |
+
Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
|
259 |
+
|
260 |
+
|
261 |
+
def parse_model(d, ch): # model_dict, input_channels(3)
|
262 |
+
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
|
263 |
+
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
|
264 |
+
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
265 |
+
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
266 |
+
|
267 |
+
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
268 |
+
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
|
269 |
+
m = eval(m) if isinstance(m, str) else m # eval strings
|
270 |
+
for j, a in enumerate(args):
|
271 |
+
with contextlib.suppress(NameError):
|
272 |
+
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
273 |
+
|
274 |
+
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
|
275 |
+
if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
|
276 |
+
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x):
|
277 |
+
c1, c2 = ch[f], args[0]
|
278 |
+
if c2 != no: # if not output
|
279 |
+
c2 = make_divisible(c2 * gw, 8)
|
280 |
+
|
281 |
+
args = [c1, c2, *args[1:]]
|
282 |
+
if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]:
|
283 |
+
args.insert(2, n) # number of repeats
|
284 |
+
n = 1
|
285 |
+
elif m is nn.BatchNorm2d:
|
286 |
+
args = [ch[f]]
|
287 |
+
elif m is Concat:
|
288 |
+
c2 = sum(ch[x] for x in f)
|
289 |
+
elif m is Detect:
|
290 |
+
args.append([ch[x] for x in f])
|
291 |
+
if isinstance(args[1], int): # number of anchors
|
292 |
+
args[1] = [list(range(args[1] * 2))] * len(f)
|
293 |
+
elif m is Contract:
|
294 |
+
c2 = ch[f] * args[0] ** 2
|
295 |
+
elif m is Expand:
|
296 |
+
c2 = ch[f] // args[0] ** 2
|
297 |
+
else:
|
298 |
+
c2 = ch[f]
|
299 |
+
|
300 |
+
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
|
301 |
+
t = str(m)[8:-2].replace('__main__.', '') # module type
|
302 |
+
np = sum(x.numel() for x in m_.parameters()) # number params
|
303 |
+
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
304 |
+
LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
|
305 |
+
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
306 |
+
layers.append(m_)
|
307 |
+
if i == 0:
|
308 |
+
ch = []
|
309 |
+
ch.append(c2)
|
310 |
+
return nn.Sequential(*layers), sorted(save)
|
311 |
+
|
312 |
+
|
313 |
+
if __name__ == '__main__':
|
314 |
+
parser = argparse.ArgumentParser()
|
315 |
+
parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
|
316 |
+
parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
|
317 |
+
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
318 |
+
parser.add_argument('--profile', action='store_true', help='profile model speed')
|
319 |
+
parser.add_argument('--line-profile', action='store_true', help='profile model speed layer by layer')
|
320 |
+
parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
|
321 |
+
opt = parser.parse_args()
|
322 |
+
opt.cfg = check_yaml(opt.cfg) # check YAML
|
323 |
+
print_args(vars(opt))
|
324 |
+
device = select_device(opt.device)
|
325 |
+
|
326 |
+
# Create model
|
327 |
+
im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
|
328 |
+
model = Model(opt.cfg).to(device)
|
329 |
+
|
330 |
+
# Options
|
331 |
+
if opt.line_profile: # profile layer by layer
|
332 |
+
_ = model(im, profile=True)
|
333 |
+
|
334 |
+
elif opt.profile: # profile forward-backward
|
335 |
+
results = profile(input=im, ops=[model], n=3)
|
336 |
+
|
337 |
+
elif opt.test: # test all models
|
338 |
+
for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
|
339 |
+
try:
|
340 |
+
_ = Model(cfg)
|
341 |
+
except Exception as e:
|
342 |
+
print(f'Error in {cfg}: {e}')
|
343 |
+
|
344 |
+
else: # report fused model summary
|
345 |
+
model.fuse()
|
asone/detectors/yolov5/yolov5/utils/__init__.py
ADDED
File without changes
|
asone/detectors/yolov5/yolov5/utils/torch_utils.py
ADDED
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
|
2 |
+
"""
|
3 |
+
PyTorch utils
|
4 |
+
"""
|
5 |
+
|
6 |
+
import math
|
7 |
+
import os
|
8 |
+
import platform
|
9 |
+
import subprocess
|
10 |
+
import time
|
11 |
+
import warnings
|
12 |
+
from contextlib import contextmanager
|
13 |
+
from copy import deepcopy
|
14 |
+
from pathlib import Path
|
15 |
+
|
16 |
+
import torch
|
17 |
+
import torch.distributed as dist
|
18 |
+
import torch.nn as nn
|
19 |
+
import torch.nn.functional as F
|
20 |
+
from torch.nn.parallel import DistributedDataParallel as DDP
|
21 |
+
|
22 |
+
# from utils.general import LOGGER, check_version, colorstr, file_date, git_describe
|
23 |
+
|
24 |
+
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
|
25 |
+
RANK = int(os.getenv('RANK', -1))
|
26 |
+
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
|
27 |
+
|
28 |
+
try:
|
29 |
+
import thop # for FLOPs computation
|
30 |
+
except ImportError:
|
31 |
+
thop = None
|
32 |
+
|
33 |
+
# Suppress PyTorch warnings
|
34 |
+
warnings.filterwarnings('ignore', message='User provided device_type of \'cuda\', but CUDA is not available. Disabling')
|
35 |
+
|
36 |
+
|
37 |
+
def smart_DDP(model):
|
38 |
+
# Model DDP creation with checks
|
39 |
+
assert not check_version(torch.__version__, '1.12.0', pinned=True), \
|
40 |
+
'torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. ' \
|
41 |
+
'Please upgrade or downgrade torch to use DDP. See https://github.com/ultralytics/yolov5/issues/8395'
|
42 |
+
if check_version(torch.__version__, '1.11.0'):
|
43 |
+
return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK, static_graph=True)
|
44 |
+
else:
|
45 |
+
return DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
|
46 |
+
|
47 |
+
|
48 |
+
@contextmanager
|
49 |
+
def torch_distributed_zero_first(local_rank: int):
|
50 |
+
# Decorator to make all processes in distributed training wait for each local_master to do something
|
51 |
+
if local_rank not in [-1, 0]:
|
52 |
+
dist.barrier(device_ids=[local_rank])
|
53 |
+
yield
|
54 |
+
if local_rank == 0:
|
55 |
+
dist.barrier(device_ids=[0])
|
56 |
+
|
57 |
+
|
58 |
+
def device_count():
|
59 |
+
# Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Supports Linux and Windows
|
60 |
+
assert platform.system() in ('Linux', 'Windows'), 'device_count() only supported on Linux or Windows'
|
61 |
+
try:
|
62 |
+
cmd = 'nvidia-smi -L | wc -l' if platform.system() == 'Linux' else 'nvidia-smi -L | find /c /v ""' # Windows
|
63 |
+
return int(subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1])
|
64 |
+
except Exception:
|
65 |
+
return 0
|
66 |
+
|
67 |
+
|
68 |
+
def select_device(device='', batch_size=0, newline=True):
|
69 |
+
# device = None or 'cpu' or 0 or '0' or '0,1,2,3'
|
70 |
+
s = f'YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} '
|
71 |
+
device = str(device).strip().lower().replace('cuda:', '').replace('none', '') # to string, 'cuda:0' to '0'
|
72 |
+
cpu = device == 'cpu'
|
73 |
+
mps = device == 'mps' # Apple Metal Performance Shaders (MPS)
|
74 |
+
if cpu or mps:
|
75 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
|
76 |
+
elif device: # non-cpu device requested
|
77 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available()
|
78 |
+
assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \
|
79 |
+
f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)"
|
80 |
+
|
81 |
+
if not (cpu or mps) and torch.cuda.is_available(): # prefer GPU if available
|
82 |
+
devices = device.split(',') if device else '0' # range(torch.cuda.device_count()) # i.e. 0,1,6,7
|
83 |
+
n = len(devices) # device count
|
84 |
+
if n > 1 and batch_size > 0: # check batch_size is divisible by device_count
|
85 |
+
assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
|
86 |
+
space = ' ' * (len(s) + 1)
|
87 |
+
for i, d in enumerate(devices):
|
88 |
+
p = torch.cuda.get_device_properties(i)
|
89 |
+
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB
|
90 |
+
arg = 'cuda:0'
|
91 |
+
elif mps and getattr(torch, 'has_mps', False) and torch.backends.mps.is_available(): # prefer MPS if available
|
92 |
+
s += 'MPS\n'
|
93 |
+
arg = 'mps'
|
94 |
+
else: # revert to CPU
|
95 |
+
s += 'CPU\n'
|
96 |
+
arg = 'cpu'
|
97 |
+
|
98 |
+
if not newline:
|
99 |
+
s = s.rstrip()
|
100 |
+
LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe
|
101 |
+
return torch.device(arg)
|
102 |
+
|
103 |
+
|
104 |
+
def time_sync():
|
105 |
+
# PyTorch-accurate time
|
106 |
+
if torch.cuda.is_available():
|
107 |
+
torch.cuda.synchronize()
|
108 |
+
return time.time()
|
109 |
+
|
110 |
+
|
111 |
+
def profile(input, ops, n=10, device=None):
|
112 |
+
results = []
|
113 |
+
if not isinstance(device, torch.device):
|
114 |
+
device = select_device(device)
|
115 |
+
print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
|
116 |
+
f"{'input':>24s}{'output':>24s}")
|
117 |
+
|
118 |
+
for x in input if isinstance(input, list) else [input]:
|
119 |
+
x = x.to(device)
|
120 |
+
x.requires_grad = True
|
121 |
+
for m in ops if isinstance(ops, list) else [ops]:
|
122 |
+
m = m.to(device) if hasattr(m, 'to') else m # device
|
123 |
+
m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
|
124 |
+
tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward
|
125 |
+
try:
|
126 |
+
flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs
|
127 |
+
except Exception:
|
128 |
+
flops = 0
|
129 |
+
|
130 |
+
try:
|
131 |
+
for _ in range(n):
|
132 |
+
t[0] = time_sync()
|
133 |
+
y = m(x)
|
134 |
+
t[1] = time_sync()
|
135 |
+
try:
|
136 |
+
_ = (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
|
137 |
+
t[2] = time_sync()
|
138 |
+
except Exception: # no backward method
|
139 |
+
# print(e) # for debug
|
140 |
+
t[2] = float('nan')
|
141 |
+
tf += (t[1] - t[0]) * 1000 / n # ms per op forward
|
142 |
+
tb += (t[2] - t[1]) * 1000 / n # ms per op backward
|
143 |
+
mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0 # (GB)
|
144 |
+
s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' for x in (x, y)) # shapes
|
145 |
+
p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0 # parameters
|
146 |
+
print(f'{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}')
|
147 |
+
results.append([p, flops, mem, tf, tb, s_in, s_out])
|
148 |
+
except Exception as e:
|
149 |
+
print(e)
|
150 |
+
results.append(None)
|
151 |
+
torch.cuda.empty_cache()
|
152 |
+
return results
|
153 |
+
|
154 |
+
|
155 |
+
def is_parallel(model):
|
156 |
+
# Returns True if model is of type DP or DDP
|
157 |
+
return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
|
158 |
+
|
159 |
+
|
160 |
+
def de_parallel(model):
|
161 |
+
# De-parallelize a model: returns single-GPU model if model is of type DP or DDP
|
162 |
+
return model.module if is_parallel(model) else model
|
163 |
+
|
164 |
+
|
165 |
+
def initialize_weights(model):
|
166 |
+
for m in model.modules():
|
167 |
+
t = type(m)
|
168 |
+
if t is nn.Conv2d:
|
169 |
+
pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
170 |
+
elif t is nn.BatchNorm2d:
|
171 |
+
m.eps = 1e-3
|
172 |
+
m.momentum = 0.03
|
173 |
+
elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
|
174 |
+
m.inplace = True
|
175 |
+
|
176 |
+
|
177 |
+
def find_modules(model, mclass=nn.Conv2d):
|
178 |
+
# Finds layer indices matching module class 'mclass'
|
179 |
+
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
|
180 |
+
|
181 |
+
|
182 |
+
def sparsity(model):
|
183 |
+
# Return global model sparsity
|
184 |
+
a, b = 0, 0
|
185 |
+
for p in model.parameters():
|
186 |
+
a += p.numel()
|
187 |
+
b += (p == 0).sum()
|
188 |
+
return b / a
|
189 |
+
|
190 |
+
|
191 |
+
def prune(model, amount=0.3):
|
192 |
+
# Prune model to requested global sparsity
|
193 |
+
import torch.nn.utils.prune as prune
|
194 |
+
print('Pruning model... ', end='')
|
195 |
+
for name, m in model.named_modules():
|
196 |
+
if isinstance(m, nn.Conv2d):
|
197 |
+
prune.l1_unstructured(m, name='weight', amount=amount) # prune
|
198 |
+
prune.remove(m, 'weight') # make permanent
|
199 |
+
print(' %.3g global sparsity' % sparsity(model))
|
200 |
+
|
201 |
+
|
202 |
+
def fuse_conv_and_bn(conv, bn):
|
203 |
+
# Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
|
204 |
+
fusedconv = nn.Conv2d(conv.in_channels,
|
205 |
+
conv.out_channels,
|
206 |
+
kernel_size=conv.kernel_size,
|
207 |
+
stride=conv.stride,
|
208 |
+
padding=conv.padding,
|
209 |
+
groups=conv.groups,
|
210 |
+
bias=True).requires_grad_(False).to(conv.weight.device)
|
211 |
+
|
212 |
+
# Prepare filters
|
213 |
+
w_conv = conv.weight.clone().view(conv.out_channels, -1)
|
214 |
+
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
|
215 |
+
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
|
216 |
+
|
217 |
+
# Prepare spatial bias
|
218 |
+
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
|
219 |
+
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
|
220 |
+
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
|
221 |
+
|
222 |
+
return fusedconv
|
223 |
+
|
224 |
+
|
225 |
+
def model_info(model, verbose=False, img_size=640):
|
226 |
+
# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
|
227 |
+
n_p = sum(x.numel() for x in model.parameters()) # number parameters
|
228 |
+
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
|
229 |
+
if verbose:
|
230 |
+
print(f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}")
|
231 |
+
for i, (name, p) in enumerate(model.named_parameters()):
|
232 |
+
name = name.replace('module_list.', '')
|
233 |
+
print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
|
234 |
+
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
|
235 |
+
|
236 |
+
try: # FLOPs
|
237 |
+
from thop import profile
|
238 |
+
stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
|
239 |
+
img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input
|
240 |
+
flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPs
|
241 |
+
img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float
|
242 |
+
fs = ', %.1f GFLOPs' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPs
|
243 |
+
except Exception:
|
244 |
+
fs = ''
|
245 |
+
|
246 |
+
name = Path(model.yaml_file).stem.replace('yolov5', 'YOLOv5') if hasattr(model, 'yaml_file') else 'Model'
|
247 |
+
# LOGGER.info(f"{name} summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
|
248 |
+
|
249 |
+
|
250 |
+
def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
|
251 |
+
# Scales img(bs,3,y,x) by ratio constrained to gs-multiple
|
252 |
+
if ratio == 1.0:
|
253 |
+
return img
|
254 |
+
h, w = img.shape[2:]
|
255 |
+
s = (int(h * ratio), int(w * ratio)) # new size
|
256 |
+
img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
|
257 |
+
if not same_shape: # pad/crop img
|
258 |
+
h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w))
|
259 |
+
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
|
260 |
+
|
261 |
+
|
262 |
+
def copy_attr(a, b, include=(), exclude=()):
|
263 |
+
# Copy attributes from b to a, options to only include [...] and to exclude [...]
|
264 |
+
for k, v in b.__dict__.items():
|
265 |
+
if (len(include) and k not in include) or k.startswith('_') or k in exclude:
|
266 |
+
continue
|
267 |
+
else:
|
268 |
+
setattr(a, k, v)
|
269 |
+
|
270 |
+
|
271 |
+
def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, weight_decay=1e-5):
|
272 |
+
# YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay
|
273 |
+
g = [], [], [] # optimizer parameter groups
|
274 |
+
bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k) # normalization layers, i.e. BatchNorm2d()
|
275 |
+
for v in model.modules():
|
276 |
+
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): # bias (no decay)
|
277 |
+
g[2].append(v.bias)
|
278 |
+
if isinstance(v, bn): # weight (no decay)
|
279 |
+
g[1].append(v.weight)
|
280 |
+
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): # weight (with decay)
|
281 |
+
g[0].append(v.weight)
|
282 |
+
|
283 |
+
if name == 'Adam':
|
284 |
+
optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999)) # adjust beta1 to momentum
|
285 |
+
elif name == 'AdamW':
|
286 |
+
optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
|
287 |
+
elif name == 'RMSProp':
|
288 |
+
optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum)
|
289 |
+
elif name == 'SGD':
|
290 |
+
optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
|
291 |
+
else:
|
292 |
+
raise NotImplementedError(f'Optimizer {name} not implemented.')
|
293 |
+
|
294 |
+
optimizer.add_param_group({'params': g[0], 'weight_decay': weight_decay}) # add g0 with weight_decay
|
295 |
+
optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0}) # add g1 (BatchNorm2d weights)
|
296 |
+
LOGGER.info(f"{colorstr('optimizer:')} {type(optimizer).__name__} with parameter groups "
|
297 |
+
f"{len(g[1])} weight (no decay), {len(g[0])} weight, {len(g[2])} bias")
|
298 |
+
return optimizer
|
299 |
+
|
300 |
+
|
301 |
+
class EarlyStopping:
|
302 |
+
# YOLOv5 simple early stopper
|
303 |
+
def __init__(self, patience=30):
|
304 |
+
self.best_fitness = 0.0 # i.e. mAP
|
305 |
+
self.best_epoch = 0
|
306 |
+
self.patience = patience or float('inf') # epochs to wait after fitness stops improving to stop
|
307 |
+
self.possible_stop = False # possible stop may occur next epoch
|
308 |
+
|
309 |
+
def __call__(self, epoch, fitness):
|
310 |
+
if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training
|
311 |
+
self.best_epoch = epoch
|
312 |
+
self.best_fitness = fitness
|
313 |
+
delta = epoch - self.best_epoch # epochs without improvement
|
314 |
+
self.possible_stop = delta >= (self.patience - 1) # possible stop may occur next epoch
|
315 |
+
stop = delta >= self.patience # stop training if patience exceeded
|
316 |
+
if stop:
|
317 |
+
LOGGER.info(f'Stopping training early as no improvement observed in last {self.patience} epochs. '
|
318 |
+
f'Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n'
|
319 |
+
f'To update EarlyStopping(patience={self.patience}) pass a new patience value, '
|
320 |
+
f'i.e. `python train.py --patience 300` or use `--patience 0` to disable EarlyStopping.')
|
321 |
+
return stop
|
322 |
+
|
323 |
+
|
324 |
+
class ModelEMA:
|
325 |
+
""" Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models
|
326 |
+
Keeps a moving average of everything in the model state_dict (parameters and buffers)
|
327 |
+
For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
|
328 |
+
"""
|
329 |
+
|
330 |
+
def __init__(self, model, decay=0.9999, tau=2000, updates=0):
|
331 |
+
# Create EMA
|
332 |
+
self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA
|
333 |
+
# if next(model.parameters()).device.type != 'cpu':
|
334 |
+
# self.ema.half() # FP16 EMA
|
335 |
+
self.updates = updates # number of EMA updates
|
336 |
+
self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs)
|
337 |
+
for p in self.ema.parameters():
|
338 |
+
p.requires_grad_(False)
|
339 |
+
|
340 |
+
def update(self, model):
|
341 |
+
# Update EMA parameters
|
342 |
+
with torch.no_grad():
|
343 |
+
self.updates += 1
|
344 |
+
d = self.decay(self.updates)
|
345 |
+
|
346 |
+
msd = de_parallel(model).state_dict() # model state_dict
|
347 |
+
for k, v in self.ema.state_dict().items():
|
348 |
+
if v.dtype.is_floating_point:
|
349 |
+
v *= d
|
350 |
+
v += (1 - d) * msd[k].detach()
|
351 |
+
|
352 |
+
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
|
353 |
+
# Update EMA attributes
|
354 |
+
copy_attr(self.ema, model, include, exclude)
|
asone/detectors/yolov5/yolov5/utils/yolov5_utils.py
ADDED
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import contextlib
|
2 |
+
import time
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
import torchvision
|
6 |
+
import cv2
|
7 |
+
import sys
|
8 |
+
from pathlib import Path
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
def box_area(box):
|
13 |
+
# box = xyxy(4,n)
|
14 |
+
return (box[2] - box[0]) * (box[3] - box[1])
|
15 |
+
|
16 |
+
|
17 |
+
def box_iou(box1, box2, eps=1e-7):
|
18 |
+
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
|
19 |
+
"""
|
20 |
+
Return intersection-over-union (Jaccard index) of boxes.
|
21 |
+
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
|
22 |
+
Arguments:
|
23 |
+
box1 (Tensor[N, 4])
|
24 |
+
box2 (Tensor[M, 4])
|
25 |
+
Returns:
|
26 |
+
iou (Tensor[N, M]): the NxM matrix containing the pairwise
|
27 |
+
IoU values for every element in boxes1 and boxes2
|
28 |
+
"""
|
29 |
+
|
30 |
+
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
|
31 |
+
(a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
|
32 |
+
inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
|
33 |
+
|
34 |
+
# IoU = inter / (area1 + area2 - inter)
|
35 |
+
return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter + eps)
|
36 |
+
|
37 |
+
def xywh2xyxy(x):
|
38 |
+
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
39 |
+
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
40 |
+
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
|
41 |
+
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
|
42 |
+
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
|
43 |
+
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
|
44 |
+
return y
|
45 |
+
|
46 |
+
def non_max_suppression(prediction,
|
47 |
+
conf_thres=0.25,
|
48 |
+
iou_thres=0.45,
|
49 |
+
classes=None,
|
50 |
+
agnostic=False,
|
51 |
+
multi_label=False,
|
52 |
+
labels=(),
|
53 |
+
max_det=300):
|
54 |
+
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
|
55 |
+
Returns:
|
56 |
+
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
|
57 |
+
"""
|
58 |
+
# prediction = torch.Tensor(prediction)
|
59 |
+
bs = prediction.shape[0] # batch size
|
60 |
+
nc = prediction.shape[2] - 5 # number of classes
|
61 |
+
xc = prediction[..., 4] > conf_thres # candidates
|
62 |
+
# Checks
|
63 |
+
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
64 |
+
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
65 |
+
|
66 |
+
# Settings
|
67 |
+
# min_wh = 2 # (pixels) minimum box width and height
|
68 |
+
max_wh = 7680 # (pixels) maximum box width and height
|
69 |
+
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
|
70 |
+
time_limit = 0.3 + 0.03 * bs # seconds to quit after
|
71 |
+
redundant = True # require redundant detections
|
72 |
+
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
73 |
+
merge = False # use merge-NMS
|
74 |
+
|
75 |
+
t = time.time()
|
76 |
+
output = [torch.zeros((0, 6), device=prediction.device)] * bs
|
77 |
+
for xi, x in enumerate(prediction): # image index, image inference
|
78 |
+
# Apply constraints
|
79 |
+
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
80 |
+
x = x[xc[xi]] # confidence
|
81 |
+
|
82 |
+
# Cat apriori labels if autolabelling
|
83 |
+
if labels and len(labels[xi]):
|
84 |
+
lb = labels[xi]
|
85 |
+
v = torch.zeros((len(lb), nc + 5), device=x.device)
|
86 |
+
v[:, :4] = lb[:, 1:5] # box
|
87 |
+
v[:, 4] = 1.0 # conf
|
88 |
+
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
|
89 |
+
x = torch.cat((x, v), 0)
|
90 |
+
|
91 |
+
# If none remain process next image
|
92 |
+
if not x.shape[0]:
|
93 |
+
continue
|
94 |
+
|
95 |
+
# Compute conf
|
96 |
+
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
|
97 |
+
|
98 |
+
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
|
99 |
+
# print(type(x))
|
100 |
+
box = xywh2xyxy(x[:, :4])
|
101 |
+
|
102 |
+
# Detections matrix nx6 (xyxy, conf, cls)
|
103 |
+
if multi_label:
|
104 |
+
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
|
105 |
+
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
|
106 |
+
else: # best class only
|
107 |
+
conf, j = x[:, 5:].max(1, keepdim=True)
|
108 |
+
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
|
109 |
+
|
110 |
+
# Filter by class
|
111 |
+
if classes is not None:
|
112 |
+
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
113 |
+
|
114 |
+
# Apply finite constraint
|
115 |
+
# if not torch.isfinite(x).all():
|
116 |
+
# x = x[torch.isfinite(x).all(1)]
|
117 |
+
|
118 |
+
# Check shape
|
119 |
+
n = x.shape[0] # number of boxes
|
120 |
+
if not n: # no boxes
|
121 |
+
continue
|
122 |
+
elif n > max_nms: # excess boxes
|
123 |
+
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
|
124 |
+
|
125 |
+
# Batched NMS
|
126 |
+
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
127 |
+
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
128 |
+
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
129 |
+
if i.shape[0] > max_det: # limit detections
|
130 |
+
i = i[:max_det]
|
131 |
+
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
132 |
+
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
133 |
+
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
134 |
+
weights = iou * scores[None] # box weights
|
135 |
+
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
136 |
+
if redundant:
|
137 |
+
i = i[iou.sum(1) > 1] # require redundancy
|
138 |
+
|
139 |
+
output[xi] = x[i]
|
140 |
+
if (time.time() - t) > time_limit:
|
141 |
+
# LOGGER.warning(f'WARNING: NMS time limit {time_limit:.3f}s exceeded')
|
142 |
+
break # time limit exceeded
|
143 |
+
|
144 |
+
return output
|
145 |
+
|
146 |
+
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
147 |
+
# Resize and pad image while meeting stride-multiple constraints
|
148 |
+
shape = im.shape[:2] # current shape [height, width]
|
149 |
+
if isinstance(new_shape, int):
|
150 |
+
new_shape = (new_shape, new_shape)
|
151 |
+
|
152 |
+
# Scale ratio (new / old)
|
153 |
+
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
154 |
+
if not scaleup: # only scale down, do not scale up (for better val mAP)
|
155 |
+
r = min(r, 1.0)
|
156 |
+
|
157 |
+
# Compute padding
|
158 |
+
ratio = r, r # width, height ratios
|
159 |
+
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
160 |
+
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
161 |
+
if auto: # minimum rectangle
|
162 |
+
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
163 |
+
elif scaleFill: # stretch
|
164 |
+
dw, dh = 0.0, 0.0
|
165 |
+
new_unpad = (new_shape[1], new_shape[0])
|
166 |
+
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
|
167 |
+
|
168 |
+
dw /= 2 # divide padding into 2 sides
|
169 |
+
dh /= 2
|
170 |
+
|
171 |
+
if shape[::-1] != new_unpad: # resize
|
172 |
+
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
|
173 |
+
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
174 |
+
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
175 |
+
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
176 |
+
return im, ratio, (dw, dh)
|
177 |
+
|
178 |
+
|
179 |
+
|
180 |
+
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
|
181 |
+
# Rescale coords (xyxy) from img1_shape to img0_shape
|
182 |
+
if ratio_pad is None: # calculate from img0_shape
|
183 |
+
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
184 |
+
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
185 |
+
else:
|
186 |
+
gain = ratio_pad[0][0]
|
187 |
+
pad = ratio_pad[1]
|
188 |
+
|
189 |
+
coords[:, [0, 2]] -= pad[0] # x padding
|
190 |
+
coords[:, [1, 3]] -= pad[1] # y padding
|
191 |
+
coords[:, :4] /= gain
|
192 |
+
clip_coords(coords, img0_shape)
|
193 |
+
return coords
|
194 |
+
|
195 |
+
|
196 |
+
def clip_coords(boxes, shape):
|
197 |
+
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
198 |
+
if isinstance(boxes, torch.Tensor): # faster individually
|
199 |
+
boxes[:, 0].clamp_(0, shape[1]) # x1
|
200 |
+
boxes[:, 1].clamp_(0, shape[0]) # y1
|
201 |
+
boxes[:, 2].clamp_(0, shape[1]) # x2
|
202 |
+
boxes[:, 3].clamp_(0, shape[0]) # y2
|
203 |
+
else: # np.array (faster grouped)
|
204 |
+
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
|
205 |
+
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
|
206 |
+
|
207 |
+
@contextlib.contextmanager
|
208 |
+
def yolov5_in_syspath():
|
209 |
+
"""
|
210 |
+
Temporarily add yolov5 folder to `sys.path`.
|
211 |
+
|
212 |
+
torch.hub handles it in the same way: https://github.com/pytorch/pytorch/blob/75024e228ca441290b6a1c2e564300ad507d7af6/torch/hub.py#L387
|
213 |
+
|
214 |
+
Proper fix for: #22, #134, #353, #1155, #1389, #1680, #2531, #3071
|
215 |
+
No need for such workarounds: #869, #1052, #2949
|
216 |
+
"""
|
217 |
+
yolov5_folder_dir = str(Path(__file__).parents[1].absolute())
|
218 |
+
try:
|
219 |
+
sys.path.insert(0, yolov5_folder_dir)
|
220 |
+
yield
|
221 |
+
finally:
|
222 |
+
sys.path.remove(yolov5_folder_dir)
|
asone/detectors/yolov5/yolov5_detector.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from asone.utils import get_names
|
3 |
+
import numpy as np
|
4 |
+
import warnings
|
5 |
+
import torch
|
6 |
+
import onnxruntime
|
7 |
+
|
8 |
+
from asone.detectors.yolov5.yolov5.utils.yolov5_utils import (non_max_suppression,
|
9 |
+
scale_coords,
|
10 |
+
letterbox)
|
11 |
+
from asone.detectors.yolov5.yolov5.models.experimental import attempt_load
|
12 |
+
from asone import utils
|
13 |
+
|
14 |
+
class YOLOv5Detector:
|
15 |
+
def __init__(self,
|
16 |
+
weights=None,
|
17 |
+
use_onnx=False,
|
18 |
+
use_cuda=True):
|
19 |
+
|
20 |
+
self.use_onnx = use_onnx
|
21 |
+
self.device = 'cuda' if use_cuda else 'cpu'
|
22 |
+
|
23 |
+
if not os.path.exists(weights):
|
24 |
+
utils.download_weights(weights)
|
25 |
+
|
26 |
+
# Load Model
|
27 |
+
self.model = self.load_model(use_cuda, weights)
|
28 |
+
|
29 |
+
def load_model(self, use_cuda, weights, fp16=False):
|
30 |
+
# Device: CUDA and if fp16=True only then half precision floating point works
|
31 |
+
self.fp16 = fp16 & ((not self.use_onnx or self.use_onnx) and self.device != 'cpu')
|
32 |
+
# Load onnx
|
33 |
+
if self.use_onnx:
|
34 |
+
if use_cuda:
|
35 |
+
providers = ['CUDAExecutionProvider','CPUExecutionProvider']
|
36 |
+
else:
|
37 |
+
providers = ['CPUExecutionProvider']
|
38 |
+
model = onnxruntime.InferenceSession(weights, providers=providers)
|
39 |
+
#Load Pytorch
|
40 |
+
else:
|
41 |
+
model = attempt_load(weights, device=self.device, inplace=True, fuse=True)
|
42 |
+
model.half() if self.fp16 else model.float()
|
43 |
+
return model
|
44 |
+
|
45 |
+
def image_preprocessing(self,
|
46 |
+
image: list,
|
47 |
+
input_shape=(640, 640))-> list:
|
48 |
+
|
49 |
+
original_image = image.copy()
|
50 |
+
image = letterbox(image, input_shape, stride=32, auto=False)[0]
|
51 |
+
image = image.transpose((2, 0, 1))[::-1]
|
52 |
+
image = np.ascontiguousarray(image, dtype=np.float32)
|
53 |
+
image /= 255 # 0 - 255 to 0.0 - 1.0
|
54 |
+
if len(image.shape) == 3:
|
55 |
+
image = image[None] # expand for batch dim
|
56 |
+
return original_image, image
|
57 |
+
|
58 |
+
def detect(self, image: list,
|
59 |
+
input_shape: tuple = (640, 640),
|
60 |
+
conf_thres: float = 0.25,
|
61 |
+
iou_thres: float = 0.45,
|
62 |
+
max_det: int = 1000,
|
63 |
+
filter_classes: bool = None,
|
64 |
+
agnostic_nms: bool = True,
|
65 |
+
with_p6: bool = False) -> list:
|
66 |
+
|
67 |
+
# Image Preprocessing
|
68 |
+
original_image, processed_image = self.image_preprocessing(image, input_shape)
|
69 |
+
|
70 |
+
# Inference
|
71 |
+
if self.use_onnx:
|
72 |
+
# Input names of ONNX model on which it is exported
|
73 |
+
input_name = self.model.get_inputs()[0].name
|
74 |
+
# Run onnx model
|
75 |
+
pred = self.model.run([self.model.get_outputs()[0].name], {input_name: processed_image})[0]
|
76 |
+
# Run Pytorch model
|
77 |
+
else:
|
78 |
+
processed_image = torch.from_numpy(processed_image).to(self.device)
|
79 |
+
# Change image floating point precision if fp16 set to true
|
80 |
+
processed_image = processed_image.half() if self.fp16 else processed_image.float()
|
81 |
+
pred = self.model(processed_image, augment=False, visualize=False)[0]
|
82 |
+
|
83 |
+
# Post Processing
|
84 |
+
if isinstance(pred, np.ndarray):
|
85 |
+
pred = torch.tensor(pred, device=self.device)
|
86 |
+
predictions = non_max_suppression(pred, conf_thres,
|
87 |
+
iou_thres,
|
88 |
+
agnostic=agnostic_nms,
|
89 |
+
max_det=max_det)
|
90 |
+
|
91 |
+
for i, prediction in enumerate(predictions): # per image
|
92 |
+
if len(prediction):
|
93 |
+
prediction[:, :4] = scale_coords(
|
94 |
+
processed_image.shape[2:], prediction[:, :4], original_image.shape).round()
|
95 |
+
predictions[i] = prediction
|
96 |
+
detections = predictions[0].cpu().numpy()
|
97 |
+
image_info = {
|
98 |
+
'width': original_image.shape[1],
|
99 |
+
'height': original_image.shape[0],
|
100 |
+
}
|
101 |
+
|
102 |
+
self.boxes = detections[:, :4]
|
103 |
+
self.scores = detections[:, 4:5]
|
104 |
+
self.class_ids = detections[:, 5:6]
|
105 |
+
|
106 |
+
if filter_classes:
|
107 |
+
class_names = get_names()
|
108 |
+
|
109 |
+
filter_class_idx = []
|
110 |
+
if filter_classes:
|
111 |
+
for _class in filter_classes:
|
112 |
+
if _class.lower() in class_names:
|
113 |
+
filter_class_idx.append(class_names.index(_class.lower()))
|
114 |
+
else:
|
115 |
+
warnings.warn(f"class {_class} not found in model classes list.")
|
116 |
+
|
117 |
+
detections = detections[np.in1d(detections[:,5].astype(int), filter_class_idx)]
|
118 |
+
|
119 |
+
return detections, image_info
|
120 |
+
|
121 |
+
|
asone/detectors/yolov6/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .yolov6_detector import YOLOv6Detector
|
2 |
+
__all__ = ['YOLOv6Detector']
|
asone/detectors/yolov6/yolov6/__init__.py
ADDED
File without changes
|
asone/detectors/yolov6/yolov6/assigners/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .atss_assigner import ATSSAssigner
|
2 |
+
from .tal_assigner import TaskAlignedAssigner
|