sft2005 commited on
Commit
9545fea
·
verified ·
1 Parent(s): 07c776b

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. .gitignore +12 -0
  3. LICENSE +21 -0
  4. README _CH.md +319 -0
  5. README.md +347 -0
  6. export_onnx.py +200 -0
  7. hubconf.py +30 -0
  8. lib/__init__.py +0 -0
  9. lib/__pycache__/__init__.cpython-310.pyc +0 -0
  10. lib/config/__init__.py +2 -0
  11. lib/config/__pycache__/__init__.cpython-310.pyc +0 -0
  12. lib/config/__pycache__/__init__.cpython-37.pyc +0 -0
  13. lib/config/__pycache__/default.cpython-310.pyc +0 -0
  14. lib/config/__pycache__/default.cpython-37.pyc +0 -0
  15. lib/config/default.py +165 -0
  16. lib/config/yolov11.py +30 -0
  17. lib/core/__init__.py +1 -0
  18. lib/core/__pycache__/__init__.cpython-310.pyc +0 -0
  19. lib/core/__pycache__/__init__.cpython-37.pyc +0 -0
  20. lib/core/__pycache__/evaluate.cpython-310.pyc +0 -0
  21. lib/core/__pycache__/evaluate.cpython-37.pyc +0 -0
  22. lib/core/__pycache__/function.cpython-310.pyc +0 -0
  23. lib/core/__pycache__/function.cpython-37.pyc +0 -0
  24. lib/core/__pycache__/general.cpython-310.pyc +0 -0
  25. lib/core/__pycache__/general.cpython-37.pyc +0 -0
  26. lib/core/__pycache__/loss.cpython-310.pyc +0 -0
  27. lib/core/__pycache__/postprocess.cpython-310.pyc +0 -0
  28. lib/core/__pycache__/postprocess.cpython-37.pyc +0 -0
  29. lib/core/activations.py +72 -0
  30. lib/core/evaluate.py +278 -0
  31. lib/core/function.py +510 -0
  32. lib/core/general.py +466 -0
  33. lib/core/loss.py +249 -0
  34. lib/core/postprocess.py +225 -0
  35. lib/dataset/AutoDriveDataset.py +264 -0
  36. lib/dataset/DemoDataset.py +188 -0
  37. lib/dataset/__init__.py +3 -0
  38. lib/dataset/__pycache__/AutoDriveDataset.cpython-310.pyc +0 -0
  39. lib/dataset/__pycache__/AutoDriveDataset.cpython-37.pyc +0 -0
  40. lib/dataset/__pycache__/DemoDataset.cpython-310.pyc +0 -0
  41. lib/dataset/__pycache__/DemoDataset.cpython-37.pyc +0 -0
  42. lib/dataset/__pycache__/__init__.cpython-310.pyc +0 -0
  43. lib/dataset/__pycache__/__init__.cpython-37.pyc +0 -0
  44. lib/dataset/__pycache__/bdd.cpython-310.pyc +0 -0
  45. lib/dataset/__pycache__/bdd.cpython-37.pyc +0 -0
  46. lib/dataset/__pycache__/convert.cpython-310.pyc +0 -0
  47. lib/dataset/__pycache__/convert.cpython-37.pyc +0 -0
  48. lib/dataset/bdd.py +85 -0
  49. lib/dataset/convert.py +31 -0
  50. lib/dataset/hust.py +87 -0
.gitattributes CHANGED
@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ pictures/da.png filter=lfs diff=lfs merge=lfs -text
37
+ pictures/detect.png filter=lfs diff=lfs merge=lfs -text
38
+ pictures/detect_onnx.jpg filter=lfs diff=lfs merge=lfs -text
39
+ pictures/input1.gif filter=lfs diff=lfs merge=lfs -text
40
+ pictures/input2.gif filter=lfs diff=lfs merge=lfs -text
41
+ pictures/ll.png filter=lfs diff=lfs merge=lfs -text
42
+ pictures/output1.gif filter=lfs diff=lfs merge=lfs -text
43
+ pictures/output2.gif filter=lfs diff=lfs merge=lfs -text
44
+ pictures/output_onnx.jpg filter=lfs diff=lfs merge=lfs -text
45
+ pictures/yolop.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ __pycache__/
3
+ .idea/
4
+ .tmp/
5
+ .vscode/
6
+ bdd/
7
+ runs/
8
+ inference/
9
+ *.pth
10
+ *.pt
11
+ *.tar
12
+ *.tar.gz
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2021 Hust Visual Learning Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README _CH.md ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="left">
2
+
3
+ ## You Only :eyes: Once for Panoptic ​ :car: Perception
4
+ > [**You Only Look at Once for Panoptic driving Perception**](https://arxiv.org/abs/2108.11250)
5
+ >
6
+ > by Dong Wu, Manwen Liao, Weitian Zhang, [Xinggang Wang](https://xinggangw.info/)<sup> :email:</sup> [*School of EIC, HUST*](http://eic.hust.edu.cn/English/Home.htm)
7
+ >
8
+ > (<sup>:email:</sup>) corresponding author.
9
+ >
10
+ > *arXiv technical report ([arXiv 2108.11250](https://arxiv.org/abs/2108.11250))*
11
+
12
+ ---
13
+
14
+ [English Document](https://github.com/hustvl/YOLOP)
15
+
16
+ ### YOLOP框架
17
+
18
+ ![yolop](pictures/yolop.png)
19
+
20
+ ### 贡献
21
+
22
+ * 我们提出了一种高效的多任务网络,该网络可以联合处理自动驾驶中的目标检测、可驾驶区域分割和车道检测三个关键任务。不但节省了计算成本,减少了推理时间,还提高了各个任务的性能。我们的工作是第一个在嵌入式设备上实现实时,同时在`BDD100K`数据集上保持`SOTA`(最先进的)性能水平。
23
+
24
+ * 我们设计了消融实验来验证我们的多任务方案的有效性。证明了这三个任务不需要繁琐的交替优化就可以联合学习。
25
+
26
+ * 我们设计了消融实验证明了基于网格的检测任务的预测机制与语义分割任务的预测机制更相关,相信这将为其他相关的多任务学习研究工作提供参考。
27
+
28
+
29
+ ### 实验结果
30
+
31
+ #### 交通目标检测结果:
32
+
33
+ | Model | Recall(%) | mAP50(%) | Speed(fps) |
34
+ | -------------- | --------- | -------- | ---------- |
35
+ | `Multinet` | 81.3 | 60.2 | 8.6 |
36
+ | `DLT-Net` | 89.4 | 68.4 | 9.3 |
37
+ | `Faster R-CNN` | 77.2 | 55.6 | 5.3 |
38
+ | `YOLOv5s` | 86.8 | 77.2 | 82 |
39
+ | `YOLOP(ours)` | 89.2 | 76.5 | 41 |
40
+ #### 可行驶区域分割结果:
41
+
42
+ | Model | mIOU(%) | Speed(fps) |
43
+ | ------------- | ------- | ---------- |
44
+ | `Multinet` | 71.6 | 8.6 |
45
+ | `DLT-Net` | 71.3 | 9.3 |
46
+ | `PSPNet` | 89.6 | 11.1 |
47
+ | `YOLOP(ours)` | 91.5 | 41 |
48
+
49
+ #### 车道线检测结果:
50
+
51
+ | Model | mIOU(%) | IOU(%) |
52
+ | ------------- | ------- | ------ |
53
+ | `ENet` | 34.12 | 14.64 |
54
+ | `SCNN` | 35.79 | 15.84 |
55
+ | `ENet-SAD` | 36.56 | 16.02 |
56
+ | `YOLOP(ours)` | 70.50 | 26.20 |
57
+
58
+ #### 消融实验 1: 端对端训练 v.s. 分步训练:
59
+
60
+ | Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) |
61
+ | --------------- | --------- | ----- | ------- | ----------- | ------ |
62
+ | `ES-W` | 87.0 | 75.3 | 90.4 | 66.8 | 26.2 |
63
+ | `ED-W` | 87.3 | 76.0 | 91.6 | 71.2 | 26.1 |
64
+ | `ES-D-W` | 87.0 | 75.1 | 91.7 | 68.6 | 27.0 |
65
+ | `ED-S-W` | 87.5 | 76.1 | 91.6 | 68.0 | 26.8 |
66
+ | `End-to-end` | 89.2 | 76.5 | 91.5 | 70.5 | 26.2 |
67
+
68
+ #### 消融实验 2: 多任务学习 v.s. 单任务学习:
69
+
70
+ | Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) | Speed(ms/frame) |
71
+ | --------------- | --------- | ----- | ------- | ----------- | ------ | --------------- |
72
+ | `Det(only)` | 88.2 | 76.9 | - | - | - | 15.7 |
73
+ | `Da-Seg(only)` | - | - | 92.0 | - | - | 14.8 |
74
+ | `Ll-Seg(only)` | - | - | - | 79.6 | 27.9 | 14.8 |
75
+ | `Multitask` | 89.2 | 76.5 | 91.5 | 70.5 | 26.2 | 24.4 |
76
+
77
+ #### 消融实验 3: 基于网格 v.s. 基于区域:
78
+
79
+ | Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) | Speed(ms/frame) |
80
+ | --------------- | --------- | ----- | ------- | ----------- | ------ | --------------- |
81
+ | `R-CNNP Det(only)` | 79.0 | 67.3 | - | - | - | - |
82
+ | `R-CNNP Seg(only)` | - | - | 90.2 | 59.5 | 24.0 | - |
83
+ | `R-CNNP Multitask` | 77.2(-1.8)| 62.6(-4.7)| 86.8(-3.4)| 49.8(-9.7)| 21.5(-2.5)| 103.3 |
84
+ | `YOLOP Det(only)` | 88.2 | 76.9 | - | - | - | - |
85
+ | `YOLOP Seg(only)` | - | - | 91.6 | 69.9 | 26.5 | - |
86
+ | `YOLOP Multitask` | 89.2(+1.0)| 76.5(-0.4)| 91.5(-0.1)| 70.5(+0.6)| 26.2(-0.3)| 24.4 |
87
+
88
+ **Notes**:
89
+
90
+ - 我们工作参考了以下工作: `Multinet` ([论文](https://arxiv.org/pdf/1612.07695.pdf?utm_campaign=affiliate-ir-Optimise%20media%28%20South%20East%20Asia%29%20Pte.%20ltd._156_-99_national_R_all_ACQ_cpa_en&utm_content=&utm_source=%20388939),[代码](https://github.com/MarvinTeichmann/MultiNet)),`DLT-Net` ([论文](https://ieeexplore.ieee.org/abstract/document/8937825)),`Faster R-CNN` ([论文](https://proceedings.neurips.cc/paper/2015/file/14bfa6bb14875e45bba028a21ed38046-Paper.pdf),[代码](https://github.com/ShaoqingRen/faster_rcnn)),`YOLOv5`([代码](https://github.com/ultralytics/yolov5)) ,`PSPNet`([论文](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf),[代码](https://github.com/hszhao/PSPNet)) ,`ENet`([论文](https://arxiv.org/pdf/1606.02147.pdf),[代码](https://github.com/osmr/imgclsmob)) `SCNN`([论文](https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/16802/16322),[代码](https://github.com/XingangPan/SCNN)) `SAD-ENet`([论文](https://openaccess.thecvf.com/content_ICCV_2019/papers/Hou_Learning_Lightweight_Lane_Detection_CNNs_by_Self_Attention_Distillation_ICCV_2019_paper.pdf),[代码](https://github.com/cardwing/Codes-for-Lane-Detection)). 感谢他们精彩的工作
91
+ - 在表 4中, E, D, S 和 W 分别代表 编码器(Encoder), 检测头(Detect head), 两个分割头(Segment heads)和整个网络(whole network). 所以算法 (首先,我们只训练编码器和检测头。然后我们冻结编码器和检测头只训练两个分割头。最后,整个网络进行联合训练三个任务) 可以被记作 `ED-S-W`,以此类推。
92
+
93
+ ---
94
+
95
+ ### 可视化
96
+
97
+ #### 交通目标检测结果
98
+
99
+ ![detect result](pictures/detect.png)
100
+
101
+ #### 可行驶区域分割结果
102
+
103
+ ![](pictures/da.png)
104
+
105
+ #### 车道线分割结果
106
+
107
+ ![](pictures/ll.png)
108
+
109
+ **注意点**:
110
+
111
+ - 车道线分割结果是经过曲线拟合的.
112
+
113
+ ---
114
+
115
+ ### Project Structure
116
+
117
+ ```python
118
+ ├─inference
119
+ │ ├─images # inference images
120
+ │ ├─output # inference result
121
+ ├─lib
122
+ │ ├─config/default # configuration of training and validation
123
+ │ ├─core
124
+ │ │ ├─activations.py # activation function
125
+ │ │ ├─evaluate.py # calculation of metric
126
+ │ │ ├─function.py # training and validation of model
127
+ │ │ ├─general.py #calculation of metric、nms、conversion of data-format、visualization
128
+ │ │ ├─loss.py # loss function
129
+ │ │ ├─postprocess.py # postprocess(refine da-seg and ll-seg, unrelated to paper)
130
+ │ ├─dataset
131
+ │ │ ├─AutoDriveDataset.py # Superclass dataset,general function
132
+ │ │ ├─bdd.py # Subclass dataset,specific function
133
+ │ │ ├─hust.py # Subclass dataset(Campus scene, unrelated to paper)
134
+ │ │ ├─convect.py
135
+ │ │ ├─DemoDataset.py # demo dataset(image, video and stream)
136
+ │ ├─models
137
+ │ │ ├─YOLOP.py # Setup and Configuration of model
138
+ │ │ ├─light.py # Model lightweight(unrelated to paper, zwt)
139
+ │ │ ├─commom.py # calculation module
140
+ │ ├─utils
141
+ │ │ ├─augmentations.py # data augumentation
142
+ │ │ ├─autoanchor.py # auto anchor(k-means)
143
+ │ │ ├─split_dataset.py # (Campus scene, unrelated to paper)
144
+ │ │ ├─utils.py # logging、device_select、time_measure、optimizer_select、model_save&initialize 、Distributed training
145
+ │ ├─run
146
+ │ │ ├─dataset/training time # Visualization, logging and model_save
147
+ ├─tools
148
+ │ │ ├─demo.py # demo(folder、camera)
149
+ │ │ ├─test.py
150
+ │ │ ├─train.py
151
+ ├─toolkits
152
+ │ │ ├─deploy # Deployment of model
153
+ │ │ ├─datapre # Generation of gt(mask) for drivable area segmentation task
154
+ ├─weights # Pretraining model
155
+ ```
156
+
157
+ ---
158
+
159
+ ### Requirement
160
+
161
+ 整个代码库是在 python 3.版本, PyTorch 1.7+版本和 torchvision 0.8+版本上开发的:
162
+
163
+ ```
164
+ conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch
165
+ ```
166
+
167
+ 其他依赖库的版本要求详见`requirements.txt`:
168
+
169
+ ```setup
170
+ pip install -r requirements.txt
171
+ ```
172
+
173
+ ### Data preparation
174
+
175
+ #### Download
176
+
177
+ - 从 [images](https://bdd-data.berkeley.edu/)下载图片数据集
178
+
179
+ - 从 [det_annotations](https://drive.google.com/file/d/1Ge-R8NTxG1eqd4zbryFo-1Uonuh0Nxyl/view?usp=sharing)下载检测任务的标签
180
+ - 从 [da_seg_annotations](https://drive.google.com/file/d/1xy_DhUZRHR8yrZG3OwTQAHhYTnXn7URv/view?usp=sharing)下载可行驶区域分割任务的标签
181
+ - 从 [ll_seg_annotations](https://drive.google.com/file/d/1lDNTPIQj_YLNZVkksKM25CvCHuquJ8AP/view?usp=sharing)下载车道线分割任务的标签
182
+
183
+ 我们推荐按照如下图片数据集文件结构:
184
+
185
+ ```
186
+ ├─dataset root
187
+ │ ├─images
188
+ │ │ ├─train
189
+ │ │ ├─val
190
+ │ ├─det_annotations
191
+ │ │ ├─train
192
+ │ │ ├─val
193
+ │ ├─da_seg_annotations
194
+ │ │ ├─train
195
+ │ │ ├─val
196
+ │ ├─ll_seg_annotations
197
+ │ │ ├─train
198
+ │ │ ├─val
199
+ ```
200
+
201
+ 在 `./lib/config/default.py`下更新数据集的路径配置。
202
+
203
+ ### 模型训练
204
+
205
+ 你可以在 `./lib/config/default.py`设定训练配置. (包括: 预训练模型的读取,损失函数, 数据增强,optimizer,训练预热和余弦退火,自动anchor,训练轮次epoch, batch_size)
206
+
207
+
208
+
209
+ 如果你想尝试交替优化或者单一任务学习,可以在`./lib/config/default.py` 中将对应的配置选项修改为 `True`。(如下,所有的配置都是 `False`, which means training multiple tasks end to end)。
210
+
211
+ ```python
212
+ # Alternating optimization
213
+ _C.TRAIN.SEG_ONLY = False # Only train two segmentation branchs
214
+ _C.TRAIN.DET_ONLY = False # Only train detection branch
215
+ _C.TRAIN.ENC_SEG_ONLY = False # Only train encoder and two segmentation branchs
216
+ _C.TRAIN.ENC_DET_ONLY = False # Only train encoder and detection branch
217
+
218
+ # Single task
219
+ _C.TRAIN.DRIVABLE_ONLY = False # Only train da_segmentation task
220
+ _C.TRAIN.LANE_ONLY = False # Only train ll_segmentation task
221
+ _C.TRAIN.DET_ONLY = False # Only train detection task
222
+ ```
223
+
224
+ 开始训练:
225
+
226
+ ```shell
227
+ python tools/train.py
228
+ ```
229
+ 多GPU训练:
230
+ ```
231
+ python -m torch.distributed.launch --nproc_per_node=N tools/train.py # N: the number of GPUs
232
+ ```
233
+
234
+ ### 模型评测
235
+
236
+ 你可以在 `./lib/config/default.py`设定测试配置(包括: batch_size 以及 nms的阈值).
237
+
238
+ 开始评测:
239
+
240
+ ```shell
241
+ python tools/test.py --weights weights/End-to-end.pth
242
+ ```
243
+
244
+
245
+
246
+ ### Demo测试
247
+
248
+ 我们提供两种测试方案
249
+
250
+ ####
251
+
252
+ 测试所使用的的图片存储在 `--source`下, 然后测试结果会保存在 `--save-dir`下:
253
+
254
+ ```shell
255
+ python tools/demo.py --source inference/images
256
+ ```
257
+
258
+
259
+
260
+ #### 相机实时
261
+
262
+ 如果你的计算机连接了摄像头, 你可以将 `source` 设为摄像头的序号(默认值为 0).
263
+
264
+ ```shell
265
+ python tools/demo.py --source 0
266
+ ```
267
+
268
+
269
+
270
+ #### 展示
271
+
272
+ <table>
273
+ <tr>
274
+ <th>input</th>
275
+ <th>output</th>
276
+ </tr>
277
+ <tr>
278
+ <td><img src=pictures/input1.gif /></td>
279
+ <td><img src=pictures/output1.gif/></td>
280
+ </tr>
281
+ <tr>
282
+ <td><img src=pictures/input2.gif /></td>
283
+ <td><img src=pictures/output2.gif/></td>
284
+ </tr>
285
+ </table>
286
+
287
+
288
+
289
+ ### 部署
290
+
291
+ 我们的模型可以在 `Jetson Tx2`上 连接`Zed Camera` 实时推理。我们使用 `TensorRT` 工具进行推理加速。我们在 `./toolkits/deploy`提供模型部署和推理的全部代码。
292
+
293
+
294
+
295
+ ### 分割标签生成
296
+
297
+ 你可以通过运行以下命令生成可行驶区域的Mask标签
298
+
299
+ ```shell
300
+ python toolkits/datasetpre/gen_bdd_seglabel.py
301
+ ```
302
+
303
+
304
+
305
+ ## 引用
306
+
307
+ 如果你发现我们的代码和论文对你的研究有帮助, 可以考虑给我们 star :star: 和引用 :pencil: :
308
+
309
+ ```BibTeX
310
+ @article{wu2022yolop,
311
+ title={Yolop: You only look once for panoptic driving perception},
312
+ author={Wu, Dong and Liao, Man-Wen and Zhang, Wei-Tian and Wang, Xing-Gang and Bai, Xiang and Cheng, Wen-Qing and Liu, Wen-Yu},
313
+ journal={Machine Intelligence Research},
314
+ pages={1--13},
315
+ year={2022},
316
+ publisher={Springer}
317
+ }
318
+ ```
319
+
README.md ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="left">
2
+
3
+ ## You Only :eyes: Once for Panoptic ​ :car: Perception
4
+ > [**You Only Look at Once for Panoptic driving Perception**](https://link.springer.com/article/10.1007/s11633-022-1339-y)
5
+ >
6
+ > by Dong Wu, Manwen Liao, Weitian Zhang, [Xinggang Wang](https://xwcv.github.io/)<sup> :email:</sup>, [Xiang Bai](https://scholar.google.com/citations?user=UeltiQ4AAAAJ&hl=zh-CN), [Wenqing Cheng](http://eic.hust.edu.cn/professor/chengwenqing/), [Wenyu Liu](http://eic.hust.edu.cn/professor/liuwenyu/) [*School of EIC, HUST*](http://eic.hust.edu.cn/English/Home.htm)
7
+ >
8
+ > (<sup>:email:</sup>) corresponding author.
9
+ >
10
+ > *arXiv technical report ([Machine Intelligence Research2022](https://link.springer.com/article/10.1007/s11633-022-1339-y))*
11
+
12
+ ---
13
+
14
+ [中文文档](https://github.com/hustvl/YOLOP/blob/main/README%20_CH.md)
15
+
16
+ ### The Illustration of YOLOP
17
+
18
+ ![yolop](pictures/yolop.png)
19
+
20
+ ### Contributions
21
+
22
+ * We put forward an efficient multi-task network that can jointly handle three crucial tasks in autonomous driving: object detection, drivable area segmentation and lane detection to save computational costs, reduce inference time as well as improve the performance of each task. Our work is the first to reach real-time on embedded devices while maintaining state-of-the-art level performance on the `BDD100K `dataset.
23
+
24
+ * We design the ablative experiments to verify the effectiveness of our multi-tasking scheme. It is proved that the three tasks can be learned jointly without tedious alternating optimization.
25
+
26
+ * We design the ablative experiments to prove that the grid-based prediction mechanism of detection task is more related to that of semantic segmentation task, which is believed to provide reference for other relevant multi-task learning research works.
27
+
28
+ ### Results
29
+
30
+ [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/yolop-you-only-look-once-for-panoptic-driving/traffic-object-detection-on-bdd100k)](https://paperswithcode.com/sota/traffic-object-detection-on-bdd100k?p=yolop-you-only-look-once-for-panoptic-driving)
31
+ #### Traffic Object Detection Result
32
+
33
+ | Model | Recall(%) | mAP50(%) | Speed(fps) |
34
+ | -------------- | --------- | -------- | ---------- |
35
+ | `Multinet` | 81.3 | 60.2 | 8.6 |
36
+ | `DLT-Net` | 89.4 | 68.4 | 9.3 |
37
+ | `Faster R-CNN` | 81.2 | 64.9 | 8.8 |
38
+ | `YOLOv5s` | 86.8 | 77.2 | 82 |
39
+ | `YOLOP(ours)` | 89.2 | 76.5 | 41 |
40
+ #### Drivable Area Segmentation Result
41
+
42
+ | Model | mIOU(%) | Speed(fps) |
43
+ | ------------- | ------- | ---------- |
44
+ | `Multinet` | 71.6 | 8.6 |
45
+ | `DLT-Net` | 71.3 | 9.3 |
46
+ | `PSPNet` | 89.6 | 11.1 |
47
+ | `YOLOP(ours)` | 91.5 | 41 |
48
+
49
+ #### Lane Detection Result:
50
+
51
+ | Model | mIOU(%) | IOU(%) |
52
+ | ------------- | ------- | ------ |
53
+ | `ENet` | 34.12 | 14.64 |
54
+ | `SCNN` | 35.79 | 15.84 |
55
+ | `ENet-SAD` | 36.56 | 16.02 |
56
+ | `YOLOP(ours)` | 70.50 | 26.20 |
57
+
58
+ #### Ablation Studies 1: End-to-end v.s. Step-by-step:
59
+
60
+ | Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) |
61
+ | --------------- | --------- | ----- | ------- | ----------- | ------ |
62
+ | `ES-W` | 87.0 | 75.3 | 90.4 | 66.8 | 26.2 |
63
+ | `ED-W` | 87.3 | 76.0 | 91.6 | 71.2 | 26.1 |
64
+ | `ES-D-W` | 87.0 | 75.1 | 91.7 | 68.6 | 27.0 |
65
+ | `ED-S-W` | 87.5 | 76.1 | 91.6 | 68.0 | 26.8 |
66
+ | `End-to-end` | 89.2 | 76.5 | 91.5 | 70.5 | 26.2 |
67
+
68
+ #### Ablation Studies 2: Multi-task v.s. Single task:
69
+
70
+ | Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) | Speed(ms/frame) |
71
+ | --------------- | --------- | ----- | ------- | ----------- | ------ | --------------- |
72
+ | `Det(only)` | 88.2 | 76.9 | - | - | - | 15.7 |
73
+ | `Da-Seg(only)` | - | - | 92.0 | - | - | 14.8 |
74
+ | `Ll-Seg(only)` | - | - | - | 79.6 | 27.9 | 14.8 |
75
+ | `Multitask` | 89.2 | 76.5 | 91.5 | 70.5 | 26.2 | 24.4 |
76
+
77
+ #### Ablation Studies 3: Grid-based v.s. Region-based:
78
+
79
+ | Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) | Speed(ms/frame) |
80
+ | --------------- | --------- | ----- | ------- | ----------- | ------ | --------------- |
81
+ | `R-CNNP Det(only)` | 79.0 | 67.3 | - | - | - | - |
82
+ | `R-CNNP Seg(only)` | - | - | 90.2 | 59.5 | 24.0 | - |
83
+ | `R-CNNP Multitask` | 77.2(-1.8)| 62.6(-4.7)| 86.8(-3.4)| 49.8(-9.7)| 21.5(-2.5)| 103.3 |
84
+ | `YOLOP Det(only)` | 88.2 | 76.9 | - | - | - | - |
85
+ | `YOLOP Seg(only)` | - | - | 91.6 | 69.9 | 26.5 | - |
86
+ | `YOLOP Multitask` | 89.2(+1.0)| 76.5(-0.4)| 91.5(-0.1)| 70.5(+0.6)| 26.2(-0.3)| 24.4 |
87
+
88
+
89
+ **Notes**:
90
+
91
+ - The works we has use for reference including `Multinet` ([paper](https://arxiv.org/pdf/1612.07695.pdf?utm_campaign=affiliate-ir-Optimise%20media%28%20South%20East%20Asia%29%20Pte.%20ltd._156_-99_national_R_all_ACQ_cpa_en&utm_content=&utm_source=%20388939),[code](https://github.com/MarvinTeichmann/MultiNet)),`DLT-Net` ([paper](https://ieeexplore.ieee.org/abstract/document/8937825)),`Faster R-CNN` ([paper](https://proceedings.neurips.cc/paper/2015/file/14bfa6bb14875e45bba028a21ed38046-Paper.pdf),[code](https://github.com/ShaoqingRen/faster_rcnn)),`YOLOv5s`([code](https://github.com/ultralytics/yolov5)) ,`PSPNet`([paper](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf),[code](https://github.com/hszhao/PSPNet)) ,`ENet`([paper](https://arxiv.org/pdf/1606.02147.pdf),[code](https://github.com/osmr/imgclsmob)) `SCNN`([paper](https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/16802/16322),[code](https://github.com/XingangPan/SCNN)) `SAD-ENet`([paper](https://openaccess.thecvf.com/content_ICCV_2019/papers/Hou_Learning_Lightweight_Lane_Detection_CNNs_by_Self_Attention_Distillation_ICCV_2019_paper.pdf),[code](https://github.com/cardwing/Codes-for-Lane-Detection)). Thanks for their wonderful works.
92
+ - In table 4, E, D, S and W refer to Encoder, Detect head, two Segment heads and whole network. So the Algorithm (First, we only train Encoder and Detect head. Then we freeze the Encoder and Detect head as well as train two Segmentation heads. Finally, the entire network is trained jointly for all three tasks.) can be marked as ED-S-W, and the same for others.
93
+
94
+ ---
95
+
96
+ ### Visualization
97
+
98
+ #### Traffic Object Detection Result
99
+
100
+ ![detect result](pictures/detect.png)
101
+
102
+ #### Drivable Area Segmentation Result
103
+
104
+ ![](pictures/da.png)
105
+
106
+ #### Lane Detection Result
107
+
108
+ ![](pictures/ll.png)
109
+
110
+ **Notes**:
111
+
112
+ - The visualization of lane detection result has been post processed by quadratic fitting.
113
+
114
+ ---
115
+
116
+ ### Project Structure
117
+
118
+ ```python
119
+ ├─inference
120
+ │ ├─images # inference images
121
+ │ ├─output # inference result
122
+ ├─lib
123
+ │ ├─config/default # configuration of training and validation
124
+ │ ├─core
125
+ │ │ ├─activations.py # activation function
126
+ │ │ ├─evaluate.py # calculation of metric
127
+ │ │ ├─function.py # training and validation of model
128
+ │ │ ├─general.py #calculation of metric、nms、conversion of data-format、visualization
129
+ │ │ ├─loss.py # loss function
130
+ │ │ ├─postprocess.py # postprocess(refine da-seg and ll-seg, unrelated to paper)
131
+ │ ├─dataset
132
+ │ │ ├─AutoDriveDataset.py # Superclass dataset,general function
133
+ │ │ ├─bdd.py # Subclass dataset,specific function
134
+ │ │ ├─hust.py # Subclass dataset(Campus scene, unrelated to paper)
135
+ │ │ ├─convect.py
136
+ │ │ ├─DemoDataset.py # demo dataset(image, video and stream)
137
+ │ ├─models
138
+ │ │ ├─YOLOP.py # Setup and Configuration of model
139
+ │ │ ├─light.py # Model lightweight(unrelated to paper, zwt)
140
+ │ │ ├─commom.py # calculation module
141
+ │ ├─utils
142
+ │ │ ├─augmentations.py # data augumentation
143
+ │ │ ├─autoanchor.py # auto anchor(k-means)
144
+ │ │ ├─split_dataset.py # (Campus scene, unrelated to paper)
145
+ │ │ ├─utils.py # logging、device_select、time_measure、optimizer_select、model_save&initialize 、Distributed training
146
+ │ ├─run
147
+ │ │ ├─dataset/training time # Visualization, logging and model_save
148
+ ├─tools
149
+ │ │ ├─demo.py # demo(folder、camera)
150
+ │ │ ├─test.py
151
+ │ │ ├─train.py
152
+ ├─toolkits
153
+ │ │ ├─deploy # Deployment of model
154
+ │ │ ├─datapre # Generation of gt(mask) for drivable area segmentation task
155
+ ├─weights # Pretraining model
156
+ ```
157
+
158
+ ---
159
+
160
+ ### Requirement
161
+
162
+ This codebase has been developed with python version 3.7, PyTorch 1.7+ and torchvision 0.8+:
163
+
164
+ ```
165
+ conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch
166
+ ```
167
+
168
+ See `requirements.txt` for additional dependencies and version requirements.
169
+
170
+ ```setup
171
+ pip install -r requirements.txt
172
+ ```
173
+
174
+ ### Data preparation
175
+
176
+ #### Download
177
+
178
+ - Download the images from [images](https://bdd-data.berkeley.edu/).
179
+
180
+ - Download the annotations of detection from [det_annotations](https://drive.google.com/file/d/1Ge-R8NTxG1eqd4zbryFo-1Uonuh0Nxyl/view?usp=sharing).
181
+ - Download the annotations of drivable area segmentation from [da_seg_annotations](https://drive.google.com/file/d/1xy_DhUZRHR8yrZG3OwTQAHhYTnXn7URv/view?usp=sharing).
182
+ - Download the annotations of lane line segmentation from [ll_seg_annotations](https://drive.google.com/file/d/1lDNTPIQj_YLNZVkksKM25CvCHuquJ8AP/view?usp=sharing).
183
+
184
+ We recommend the dataset directory structure to be the following:
185
+
186
+ ```
187
+ # The id represent the correspondence relation
188
+ ├─dataset root
189
+ │ ├─images
190
+ │ │ ├─train
191
+ │ │ ├─val
192
+ │ ├─det_annotations
193
+ │ │ ├─train
194
+ │ │ ├─val
195
+ │ ├─da_seg_annotations
196
+ │ │ ├─train
197
+ │ │ ├─val
198
+ │ ├─ll_seg_annotations
199
+ │ │ ├─train
200
+ │ │ ├─val
201
+ ```
202
+
203
+ Update the your dataset path in the `./lib/config/default.py`.
204
+
205
+ ### Training
206
+
207
+ You can set the training configuration in the `./lib/config/default.py`. (Including: the loading of preliminary model, loss, data augmentation, optimizer, warm-up and cosine annealing, auto-anchor, training epochs, batch_size).
208
+
209
+ If you want try alternating optimization or train model for single task, please modify the corresponding configuration in `./lib/config/default.py` to `True`. (As following, all configurations is `False`, which means training multiple tasks end to end).
210
+
211
+ ```python
212
+ # Alternating optimization
213
+ _C.TRAIN.SEG_ONLY = False # Only train two segmentation branchs
214
+ _C.TRAIN.DET_ONLY = False # Only train detection branch
215
+ _C.TRAIN.ENC_SEG_ONLY = False # Only train encoder and two segmentation branchs
216
+ _C.TRAIN.ENC_DET_ONLY = False # Only train encoder and detection branch
217
+
218
+ # Single task
219
+ _C.TRAIN.DRIVABLE_ONLY = False # Only train da_segmentation task
220
+ _C.TRAIN.LANE_ONLY = False # Only train ll_segmentation task
221
+ _C.TRAIN.DET_ONLY = False # Only train detection task
222
+ ```
223
+
224
+ Start training:
225
+
226
+ ```shell
227
+ python tools/train.py
228
+ ```
229
+ Multi GPU mode:
230
+ ```shell
231
+ python -m torch.distributed.launch --nproc_per_node=N tools/train.py # N: the number of GPUs
232
+ ```
233
+
234
+
235
+ ### Evaluation
236
+
237
+ You can set the evaluation configuration in the `./lib/config/default.py`. (Including: batch_size and threshold value for nms).
238
+
239
+ Start evaluating:
240
+
241
+ ```shell
242
+ python tools/test.py --weights weights/End-to-end.pth
243
+ ```
244
+
245
+
246
+
247
+ ### Demo Test
248
+
249
+ We provide two testing method.
250
+
251
+ #### Folder
252
+
253
+ You can store the image or video in `--source`, and then save the reasoning result to `--save-dir`
254
+
255
+ ```shell
256
+ python tools/demo.py --source inference/images
257
+ ```
258
+
259
+
260
+
261
+ #### Camera
262
+
263
+ If there are any camera connected to your computer, you can set the `source` as the camera number(The default is 0).
264
+
265
+ ```shell
266
+ python tools/demo.py --source 0
267
+ ```
268
+
269
+
270
+
271
+ #### Demonstration
272
+
273
+ <table>
274
+ <tr>
275
+ <th>input</th>
276
+ <th>output</th>
277
+ </tr>
278
+ <tr>
279
+ <td><img src=pictures/input1.gif /></td>
280
+ <td><img src=pictures/output1.gif/></td>
281
+ </tr>
282
+ <tr>
283
+ <td><img src=pictures/input2.gif /></td>
284
+ <td><img src=pictures/output2.gif/></td>
285
+ </tr>
286
+ </table>
287
+
288
+
289
+
290
+ ### Deployment
291
+
292
+ Our model can reason in real-time on `Jetson Tx2`, with `Zed Camera` to capture image. We use `TensorRT` tool for speeding up. We provide code for deployment and reasoning of model in `./toolkits/deploy`.
293
+
294
+
295
+
296
+ ### Segmentation Label(Mask) Generation
297
+
298
+ You can generate the label for drivable area segmentation task by running
299
+
300
+ ```shell
301
+ python toolkits/datasetpre/gen_bdd_seglabel.py
302
+ ```
303
+
304
+
305
+
306
+ #### Model Transfer
307
+
308
+ Before reasoning with TensorRT C++ API, you need to transfer the `.pth` file into binary file which can be read by C++.
309
+
310
+ ```shell
311
+ python toolkits/deploy/gen_wts.py
312
+ ```
313
+
314
+ After running the above command, you obtain a binary file named `yolop.wts`.
315
+
316
+
317
+
318
+ #### Running Inference
319
+
320
+ TensorRT needs an engine file for inference. Building an engine is time-consuming. It is convenient to save an engine file so that you can reuse it every time you run the inference. The process is integrated in `main.cpp`. It can determine whether to build an engine according to the existence of your engine file.
321
+
322
+
323
+
324
+ ### Third Parties Resource
325
+ * YOLOP OpenCV-DNN C++ Demo: [YOLOP-opencv-dnn](https://github.com/hpc203/YOLOP-opencv-dnn) from [hpc203](https://github.com/hpc203)
326
+ * YOLOP ONNXRuntime C++ Demo: [lite.ai.toolkit](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/yolop.cpp) from [DefTruth](https://github.com/DefTruth)
327
+ * YOLOP NCNN C++ Demo: [YOLOP-NCNN](https://github.com/EdVince/YOLOP-NCNN) from [EdVince](https://github.com/EdVince)
328
+ * YOLOP MNN C++ Demo: [YOLOP-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_yolop.cpp) from [DefTruth](https://github.com/DefTruth)
329
+ * YOLOP TNN C++ Demo: [YOLOP-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_yolop.cpp) from [DefTruth](https://github.com/DefTruth)
330
+
331
+
332
+
333
+ ## Citation
334
+
335
+ If you find our paper and code useful for your research, please consider giving a star :star: and citation :pencil: :
336
+
337
+ ```BibTeX
338
+ @article{wu2022yolop,
339
+ title={Yolop: You only look once for panoptic driving perception},
340
+ author={Wu, Dong and Liao, Man-Wen and Zhang, Wei-Tian and Wang, Xing-Gang and Bai, Xiang and Cheng, Wen-Qing and Liu, Wen-Yu},
341
+ journal={Machine Intelligence Research},
342
+ pages={1--13},
343
+ year={2022},
344
+ publisher={Springer}
345
+ }
346
+ ```
347
+
export_onnx.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from lib.models.common import Conv, SPP, Bottleneck, BottleneckCSP, Focus, Concat, Detect, SharpenConv
4
+ from torch.nn import Upsample
5
+ from lib.utils import check_anchor_order
6
+ from lib.utils import initialize_weights
7
+ import argparse
8
+ import onnx
9
+ import onnxruntime as ort
10
+ import onnxsim
11
+
12
+ import math
13
+ import cv2
14
+
15
+ # The lane line and the driving area segment branches without share information with each other and without link
16
+ YOLOP = [
17
+ [24, 33, 42], # Det_out_idx, Da_Segout_idx, LL_Segout_idx
18
+ [-1, Focus, [3, 32, 3]], # 0
19
+ [-1, Conv, [32, 64, 3, 2]], # 1
20
+ [-1, BottleneckCSP, [64, 64, 1]], # 2
21
+ [-1, Conv, [64, 128, 3, 2]], # 3
22
+ [-1, BottleneckCSP, [128, 128, 3]], # 4
23
+ [-1, Conv, [128, 256, 3, 2]], # 5
24
+ [-1, BottleneckCSP, [256, 256, 3]], # 6
25
+ [-1, Conv, [256, 512, 3, 2]], # 7
26
+ [-1, SPP, [512, 512, [5, 9, 13]]], # 8 SPP
27
+ [-1, BottleneckCSP, [512, 512, 1, False]], # 9
28
+ [-1, Conv, [512, 256, 1, 1]], # 10
29
+ [-1, Upsample, [None, 2, 'nearest']], # 11
30
+ [[-1, 6], Concat, [1]], # 12
31
+ [-1, BottleneckCSP, [512, 256, 1, False]], # 13
32
+ [-1, Conv, [256, 128, 1, 1]], # 14
33
+ [-1, Upsample, [None, 2, 'nearest']], # 15
34
+ [[-1, 4], Concat, [1]], # 16 #Encoder
35
+
36
+ [-1, BottleneckCSP, [256, 128, 1, False]], # 17
37
+ [-1, Conv, [128, 128, 3, 2]], # 18
38
+ [[-1, 14], Concat, [1]], # 19
39
+ [-1, BottleneckCSP, [256, 256, 1, False]], # 20
40
+ [-1, Conv, [256, 256, 3, 2]], # 21
41
+ [[-1, 10], Concat, [1]], # 22
42
+ [-1, BottleneckCSP, [512, 512, 1, False]], # 23
43
+ [[17, 20, 23], Detect,
44
+ [1, [[3, 9, 5, 11, 4, 20], [7, 18, 6, 39, 12, 31], [19, 50, 38, 81, 68, 157]], [128, 256, 512]]],
45
+ # Detection head 24: from_(features from specific layers), block, nc(num_classes) anchors ch(channels)
46
+
47
+ [16, Conv, [256, 128, 3, 1]], # 25
48
+ [-1, Upsample, [None, 2, 'nearest']], # 26
49
+ [-1, BottleneckCSP, [128, 64, 1, False]], # 27
50
+ [-1, Conv, [64, 32, 3, 1]], # 28
51
+ [-1, Upsample, [None, 2, 'nearest']], # 29
52
+ [-1, Conv, [32, 16, 3, 1]], # 30
53
+ [-1, BottleneckCSP, [16, 8, 1, False]], # 31
54
+ [-1, Upsample, [None, 2, 'nearest']], # 32
55
+ [-1, Conv, [8, 2, 3, 1]], # 33 Driving area segmentation head
56
+
57
+ [16, Conv, [256, 128, 3, 1]], # 34
58
+ [-1, Upsample, [None, 2, 'nearest']], # 35
59
+ [-1, BottleneckCSP, [128, 64, 1, False]], # 36
60
+ [-1, Conv, [64, 32, 3, 1]], # 37
61
+ [-1, Upsample, [None, 2, 'nearest']], # 38
62
+ [-1, Conv, [32, 16, 3, 1]], # 39
63
+ [-1, BottleneckCSP, [16, 8, 1, False]], # 40
64
+ [-1, Upsample, [None, 2, 'nearest']], # 41
65
+ [-1, Conv, [8, 2, 3, 1]] # 42 Lane line segmentation head
66
+ ]
67
+
68
+
69
+ class MCnet(nn.Module):
70
+ def __init__(self, block_cfg):
71
+ super(MCnet, self).__init__()
72
+ layers, save = [], []
73
+ self.nc = 1 # traffic or not
74
+ self.detector_index = -1
75
+ self.det_out_idx = block_cfg[0][0]
76
+ self.seg_out_idx = block_cfg[0][1:]
77
+ self.num_anchors = 3
78
+ self.num_outchannel = 5 + self.nc # dx,dy,dw,dh,obj_conf+cls_conf
79
+ # Build model
80
+ for i, (from_, block, args) in enumerate(block_cfg[1:]):
81
+ block = eval(block) if isinstance(block, str) else block # eval strings
82
+ if block is Detect:
83
+ self.detector_index = i
84
+ block_ = block(*args)
85
+ block_.index, block_.from_ = i, from_
86
+ layers.append(block_)
87
+ save.extend(x % i for x in ([from_] if isinstance(from_, int) else from_) if x != -1) # append to savelist
88
+ assert self.detector_index == block_cfg[0][0]
89
+
90
+ self.model, self.save = nn.Sequential(*layers), sorted(save)
91
+ self.names = [str(i) for i in range(self.nc)]
92
+
93
+ # set stride、anchor for detector
94
+ Detector = self.model[self.detector_index] # detector
95
+ if isinstance(Detector, Detect):
96
+ s = 128 # 2x min stride
97
+ # for x in self.forward(torch.zeros(1, 3, s, s)):
98
+ # print (x.shape)
99
+ with torch.no_grad():
100
+ model_out = self.forward(torch.zeros(1, 3, s, s))
101
+ detects, _, _ = model_out
102
+ Detector.stride = torch.tensor([s / x.shape[-2] for x in detects]) # forward
103
+ # print("stride"+str(Detector.stride ))
104
+ Detector.anchors /= Detector.stride.view(-1, 1, 1) # Set the anchors for the corresponding scale
105
+ check_anchor_order(Detector)
106
+ self.stride = Detector.stride
107
+ # self._initialize_biases()
108
+ initialize_weights(self)
109
+
110
+ def forward(self, x):
111
+ cache = []
112
+ out = []
113
+ det_out = None
114
+ for i, block in enumerate(self.model):
115
+ if block.from_ != -1:
116
+ x = cache[block.from_] if isinstance(block.from_, int) \
117
+ else [x if j == -1 else cache[j] for j in
118
+ block.from_] # calculate concat detect
119
+ x = block(x)
120
+ if i in self.seg_out_idx: # save driving area segment result
121
+ # m = nn.Sigmoid()
122
+ # out.append(m(x))
123
+ out.append(torch.sigmoid(x))
124
+ if i == self.detector_index:
125
+ # det_out = x
126
+ if self.training:
127
+ det_out = x
128
+ else:
129
+ det_out = x[0] # (torch.cat(z, 1), input_feat) if test
130
+ cache.append(x if block.index in self.save else None)
131
+ return det_out, out[0], out[1] # det, da, ll
132
+ # (1,na*ny*nx*nl,no=2+2+1+nc=xy+wh+obj_conf+cls_prob), (1,2,h,w) (1,2,h,w)
133
+
134
+ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
135
+ # https://arxiv.org/abs/1708.02002 section 3.3
136
+ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
137
+ # m = self.model[-1] # Detect() module
138
+ m = self.model[self.detector_index] # Detect() module
139
+ for mi, s in zip(m.m, m.stride): # from
140
+ b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
141
+ b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
142
+ b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
143
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
144
+
145
+
146
+ if __name__ == "__main__":
147
+ parser = argparse.ArgumentParser()
148
+ parser.add_argument('--height', type=int, default=640) # height
149
+ parser.add_argument('--width', type=int, default=640) # width
150
+ args = parser.parse_args()
151
+
152
+ do_simplify = True
153
+
154
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
155
+ model = MCnet(YOLOP)
156
+ checkpoint = torch.load('./weights/End-to-end.pth', map_location=device)
157
+ model.load_state_dict(checkpoint['state_dict'])
158
+ model.eval()
159
+
160
+ height = args.height
161
+ width = args.width
162
+ print("Load ./weights/End-to-end.pth done!")
163
+ onnx_path = f'./weights/yolop-{height}-{width}.onnx'
164
+ inputs = torch.randn(1, 3, height, width)
165
+
166
+ print(f"Converting to {onnx_path}")
167
+ torch.onnx.export(model, inputs, onnx_path,
168
+ verbose=False, opset_version=12, input_names=['images'],
169
+ output_names=['det_out', 'drive_area_seg', 'lane_line_seg'])
170
+ print('convert', onnx_path, 'to onnx finish!!!')
171
+ # Checks
172
+ model_onnx = onnx.load(onnx_path) # load onnx model
173
+ onnx.checker.check_model(model_onnx) # check onnx model
174
+ print(onnx.helper.printable_graph(model_onnx.graph)) # print
175
+
176
+ if do_simplify:
177
+ print(f'simplifying with onnx-simplifier {onnxsim.__version__}...')
178
+ model_onnx, check = onnxsim.simplify(model_onnx, check_n=3)
179
+ assert check, 'assert check failed'
180
+ onnx.save(model_onnx, onnx_path)
181
+
182
+ x = inputs.cpu().numpy()
183
+ try:
184
+ sess = ort.InferenceSession(onnx_path)
185
+
186
+ for ii in sess.get_inputs():
187
+ print("Input: ", ii)
188
+ for oo in sess.get_outputs():
189
+ print("Output: ", oo)
190
+
191
+ print('read onnx using onnxruntime sucess')
192
+ except Exception as e:
193
+ print('read failed')
194
+ raise e
195
+
196
+ """
197
+ PYTHONPATH=. python3 ./export_onnx.py --height 640 --width 640
198
+ PYTHONPATH=. python3 ./export_onnx.py --height 1280 --width 1280
199
+ PYTHONPATH=. python3 ./export_onnx.py --height 320 --width 320
200
+ """
hubconf.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOP by hustvl, MIT License
2
+ dependencies = ['torch']
3
+ import torch
4
+ from lib.utils.utils import select_device
5
+ from lib.config import cfg
6
+ from lib.models import get_net
7
+ from pathlib import Path
8
+ import os
9
+
10
+ def yolop(pretrained=True, device="cpu"):
11
+ """Creates YOLOP model
12
+ Arguments:
13
+ pretrained (bool): load pretrained weights into the model
14
+ wieghts (int): the url of pretrained weights
15
+ device (str): cuda device i.e. 0 or 0,1,2,3 or cpu
16
+ Returns:
17
+ YOLOP pytorch model
18
+ """
19
+ device = select_device(device = device)
20
+ model = get_net(cfg)
21
+ if pretrained:
22
+ path = os.path.join(Path(__file__).resolve().parent, "weights/End-to-end.pth")
23
+ checkpoint = torch.load(path, map_location= device)
24
+ model.load_state_dict(checkpoint['state_dict'])
25
+ model = model.to(device)
26
+ return model
27
+
28
+
29
+
30
+
lib/__init__.py ADDED
File without changes
lib/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (126 Bytes). View file
 
lib/config/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .default import _C as cfg
2
+ from .default import update_config
lib/config/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (208 Bytes). View file
 
lib/config/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (200 Bytes). View file
 
lib/config/__pycache__/default.cpython-310.pyc ADDED
Binary file (2.83 kB). View file
 
lib/config/__pycache__/default.cpython-37.pyc ADDED
Binary file (2.69 kB). View file
 
lib/config/default.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import platform
3
+ from yacs.config import CfgNode as CN
4
+
5
+
6
+ _C = CN()
7
+
8
+ _C.LOG_DIR = 'runs/'
9
+ _C.GPUS = (0,) # 只使用第一块 GPU(你只有一块)
10
+ # Auto-detect optimal worker count based on OS and CPU cores
11
+ # Windows: use 0 to avoid multiprocessing issues (most stable)
12
+ # Linux: use 8 workers for better performance (4090D推荐8-16)
13
+ _C.WORKERS = 0 if platform.system() == 'Windows' else 8
14
+ _C.PIN_MEMORY = False # 4090D 启用 PIN_MEMORY 加速数据传输
15
+ _C.PRINT_FREQ = 20
16
+ _C.AUTO_RESUME =False # Resume from the last training interrupt
17
+ _C.NEED_AUTOANCHOR = False # Re-select the prior anchor(k-means) When training from scratch (epoch=0), set it to be ture!
18
+ _C.DEBUG = False
19
+ _C.num_seg_class = 2
20
+
21
+ # Cudnn related params
22
+ _C.CUDNN = CN()
23
+ _C.CUDNN.BENCHMARK = True
24
+ _C.CUDNN.DETERMINISTIC = False
25
+ _C.CUDNN.ENABLED = True
26
+
27
+
28
+ # common params for NETWORK
29
+ _C.MODEL = CN(new_allowed=True)
30
+ _C.MODEL.NAME = 'yolop_yolov11_small'
31
+ _C.MODEL.STRU_WITHSHARE = False #add share_block to segbranch
32
+ _C.MODEL.HEADS_NAME = ['']
33
+ _C.MODEL.PRETRAINED = ""
34
+ _C.MODEL.PRETRAINED_DET = "" # 已弃用
35
+ _C.MODEL.IMAGE_SIZE = [640, 640] # width * height, ex: 192 * 256
36
+ _C.MODEL.EXTRA = CN(new_allowed=True)
37
+
38
+
39
+ # loss params
40
+ _C.LOSS = CN(new_allowed=True)
41
+ _C.LOSS.LOSS_NAME = ''
42
+ # YOLOv11 Multi-head lambda weights: [cls, obj(unused), box, da_seg, ll_seg, ll_iou]
43
+ # 这是在各个GAIN基础上的额外权重系数,最终损失 = 原始损失 × GAIN × LAMBDA
44
+ _C.LOSS.MULTI_HEAD_LAMBDA = [1.0, 1.0, 1.0, 0.5, 0.5, 0.8] # 分割任务降权
45
+ _C.LOSS.FL_GAMMA = 0.0 # focal loss gamma
46
+ _C.LOSS.CLS_POS_WEIGHT = 1.0 # classification loss positive weights
47
+ _C.LOSS.OBJ_POS_WEIGHT = 1.0 # object loss positive weights (YOLOv5 only)
48
+ _C.LOSS.SEG_POS_WEIGHT = 1.0 # segmentation loss positive weights
49
+ _C.LOSS.BOX_GAIN = 7.5 # box loss gain (YOLOv11 default, 适合小量级box loss)
50
+ _C.LOSS.CLS_GAIN = 0.5 # classification loss gain
51
+ _C.LOSS.OBJ_GAIN = 1.0 # object loss gain (YOLOv5 only, unused in YOLOv11)
52
+ _C.LOSS.DA_SEG_GAIN = 0.2 # driving area seg loss (降低: 2.0->0.5, seg量级大需要小权重)
53
+ _C.LOSS.LL_SEG_GAIN = 0.2 # lane line seg loss (降低: 2.0->0.5)
54
+ _C.LOSS.LL_IOU_GAIN = 0.2 # lane line iou loss (降低: 2.0->1.0, iou量级适中)
55
+
56
+
57
+ # DATASET related params
58
+ _C.DATASET = CN(new_allowed=True)
59
+ _C.DATASET.DATAROOT = 'E:\\minimake\\100k' # the path of images folder
60
+ _C.DATASET.LABELROOT = 'E:/minimake/det_annotations/zwt/bdd/bdd100k/labels/det_annotations' # the path of det_annotations folder label
61
+ _C.DATASET.MASKROOT = 'E:/minimake/da_seg_annotations' # the path of da_seg_annotations folder mask
62
+ _C.DATASET.LANEROOT = 'E:\\minimake\\ll_seg_annotations' # the path of ll_seg_annotations folder lane
63
+ _C.DATASET.DATASET = 'BddDataset'
64
+ _C.DATASET.TRAIN_SET = 'train'
65
+ _C.DATASET.TEST_SET = 'val'
66
+ _C.DATASET.DATA_FORMAT = 'jpg'
67
+ _C.DATASET.SELECT_DATA = False
68
+ _C.DATASET.ORG_IMG_SIZE = [720, 1280]
69
+
70
+ # training data augmentation
71
+ _C.DATASET.FLIP = True
72
+ _C.DATASET.SCALE_FACTOR = 0.5 # 0.25
73
+ _C.DATASET.ROT_FACTOR = 15 # 10
74
+ _C.DATASET.TRANSLATE = 0.1
75
+ _C.DATASET.SHEAR = 0.0
76
+ _C.DATASET.COLOR_RGB = False
77
+ _C.DATASET.HSV_H = 0.015 # image HSV-Hue augmentation (fraction)
78
+ _C.DATASET.HSV_S = 0.7 # image HSV-Saturation augmentation (fraction)
79
+ _C.DATASET.HSV_V = 0.4 # image HSV-Value augmentation (fraction)
80
+ # TODO: more augmet params to add
81
+
82
+
83
+ # train
84
+ _C.TRAIN = CN(new_allowed=True)
85
+ # 大 batch_size (64) 需要更大的学习率,按比例缩放:lr = base_lr * (batch_size / base_batch)
86
+ # base: lr=0.001, batch=4 → batch=64: lr = 0.001 * (64/4) = 0.016
87
+ _C.TRAIN.LR0 = 0.001 # initial learning rate for batch_size=64 (scaled from 0.001)
88
+ _C.TRAIN.LRF = 0.01 # final OneCycleLR learning rate (lr0 * lrf) - YOLOv11 uses 0.01
89
+ _C.TRAIN.WARMUP_EPOCHS = 5.0 # 大batch增加warmup到5 epoch
90
+ _C.TRAIN.WARMUP_BIASE_LR = 0.1
91
+ _C.TRAIN.WARMUP_MOMENTUM = 0.8
92
+
93
+ _C.TRAIN.OPTIMIZER = 'adam'
94
+ _C.TRAIN.MOMENTUM = 0.937
95
+ _C.TRAIN.WD = 0.0005
96
+ _C.TRAIN.NESTEROV = True
97
+ _C.TRAIN.GAMMA1 = 0.99
98
+ _C.TRAIN.GAMMA2 = 0.0
99
+
100
+ _C.TRAIN.BEGIN_EPOCH = 0
101
+ _C.TRAIN.END_EPOCH = 200 # YOLOv11 recommends 200+ epochs
102
+
103
+ _C.TRAIN.VAL_FREQ = 1
104
+ _C.TRAIN.BATCH_SIZE_PER_GPU = 2 # 4090D (24GB) 可以支持 batch_size=64
105
+ _C.TRAIN.SHUFFLE = True
106
+
107
+ _C.TRAIN.IOU_THRESHOLD = 0.2
108
+ _C.TRAIN.ANCHOR_THRESHOLD = 4.0
109
+
110
+ # if training 3 tasks end-to-end, set all parameters as True
111
+ # Alternating optimization
112
+ _C.TRAIN.SEG_ONLY = False # Only train two segmentation branchs
113
+ _C.TRAIN.DET_ONLY = False # Only train detection branch
114
+ _C.TRAIN.ENC_SEG_ONLY = False # Only train encoder and two segmentation branchs
115
+ _C.TRAIN.ENC_DET_ONLY = False # Only train encoder and detection branch
116
+
117
+ # Single task
118
+ _C.TRAIN.DRIVABLE_ONLY = False # Only train da_segmentation task
119
+ _C.TRAIN.LANE_ONLY = False # Only train ll_segmentation task
120
+ _C.TRAIN.DET_ONLY = False # Only train detection task
121
+
122
+
123
+
124
+
125
+ _C.TRAIN.PLOT = True #
126
+
127
+ # testing
128
+ _C.TEST = CN(new_allowed=True)
129
+ _C.TEST.BATCH_SIZE_PER_GPU = 8
130
+ _C.TEST.MODEL_FILE = ''
131
+ _C.TEST.SAVE_JSON = False
132
+ _C.TEST.SAVE_TXT = False
133
+ _C.TEST.PLOTS = True
134
+ _C.TEST.NMS_CONF_THRESHOLD = 0.001
135
+ _C.TEST.NMS_IOU_THRESHOLD = 0.6
136
+
137
+
138
+ def update_config(cfg, args):
139
+ cfg.defrost()
140
+ # cfg.merge_from_file(args.cfg)
141
+
142
+ if args.modelDir:
143
+ cfg.OUTPUT_DIR = args.modelDir
144
+
145
+ if args.logDir:
146
+ cfg.LOG_DIR = args.logDir
147
+
148
+ # if args.conf_thres:
149
+ # cfg.TEST.NMS_CONF_THRESHOLD = args.conf_thres
150
+
151
+ # if args.iou_thres:
152
+ # cfg.TEST.NMS_IOU_THRESHOLD = args.iou_thres
153
+
154
+
155
+
156
+ # cfg.MODEL.PRETRAINED = os.path.join(
157
+ # cfg.DATA_DIR, cfg.MODEL.PRETRAINED
158
+ # )
159
+ #
160
+ # if cfg.TEST.MODEL_FILE:
161
+ # cfg.TEST.MODEL_FILE = os.path.join(
162
+ # cfg.DATA_DIR, cfg.TEST.MODEL_FILE
163
+ # )
164
+
165
+ cfg.freeze()
lib/config/yolov11.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ YOLOv11 Backbone 配置文件
3
+ 使用 YOLOv11 作为 backbone 的 YOLOP 模型配置
4
+ """
5
+ import os
6
+ import platform
7
+ from .default import _C as cfg
8
+
9
+ # 覆盖默认配置以使用 YOLOv11
10
+ cfg.MODEL.USE_YOLOV11 = True
11
+ cfg.MODEL.YOLOV11_SCALE = 's' # 'n', 's', 'm', 'l', 'x'
12
+ cfg.MODEL.YOLOV11_WEIGHTS = '' # 'weights/yolo11s.pt'
13
+ cfg.MODEL.FREEZE_BACKBONE = False # 是否冻结 backbone
14
+
15
+ # 训练配置
16
+ cfg.TRAIN.BATCH_SIZE_PER_GPU = 8 # YOLOv11s 可以用更大的 batch size
17
+ cfg.TRAIN.END_EPOCH = 200 # YOLOv11 建议 200+ epochs
18
+
19
+ # 学习率配置(冻结 backbone 时可以用更大的学习率)
20
+ cfg.TRAIN.LR0 = 0.01 # 冻结 backbone 时增大学习率
21
+ cfg.TRAIN.LRF = 0.01
22
+ cfg.TRAIN.WARMUP_EPOCHS = 3.0 # 减少 warmup
23
+
24
+ # 损失权重(YOLOv11 适配)
25
+ cfg.LOSS.MULTI_HEAD_LAMBDA = [1.0, 1.0, 1.0, 0.5, 0.5, 0.8]
26
+ cfg.LOSS.BOX_GAIN = 7.5
27
+ cfg.LOSS.CLS_GAIN = 0.5
28
+ cfg.LOSS.DA_SEG_GAIN = 0.5
29
+ cfg.LOSS.LL_SEG_GAIN = 0.5
30
+ cfg.LOSS.LL_IOU_GAIN = 1.0
lib/core/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .function import AverageMeter
lib/core/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (176 Bytes). View file
 
lib/core/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (168 Bytes). View file
 
lib/core/__pycache__/evaluate.cpython-310.pyc ADDED
Binary file (9.74 kB). View file
 
lib/core/__pycache__/evaluate.cpython-37.pyc ADDED
Binary file (9.76 kB). View file
 
lib/core/__pycache__/function.cpython-310.pyc ADDED
Binary file (14.8 kB). View file
 
lib/core/__pycache__/function.cpython-37.pyc ADDED
Binary file (14.7 kB). View file
 
lib/core/__pycache__/general.cpython-310.pyc ADDED
Binary file (14.2 kB). View file
 
lib/core/__pycache__/general.cpython-37.pyc ADDED
Binary file (14.4 kB). View file
 
lib/core/__pycache__/loss.cpython-310.pyc ADDED
Binary file (6.54 kB). View file
 
lib/core/__pycache__/postprocess.cpython-310.pyc ADDED
Binary file (5.73 kB). View file
 
lib/core/__pycache__/postprocess.cpython-37.pyc ADDED
Binary file (5.79 kB). View file
 
lib/core/activations.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Activation functions
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+
7
+
8
+ # Swish https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------
9
+ class Swish(nn.Module): #
10
+ @staticmethod
11
+ def forward(x):
12
+ return x * torch.sigmoid(x)
13
+
14
+
15
+ class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
16
+ @staticmethod
17
+ def forward(x):
18
+ # return x * F.hardsigmoid(x) # for torchscript and CoreML
19
+ return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
20
+
21
+
22
+ class MemoryEfficientSwish(nn.Module):
23
+ class F(torch.autograd.Function):
24
+ @staticmethod
25
+ def forward(ctx, x):
26
+ ctx.save_for_backward(x)
27
+ return x * torch.sigmoid(x)
28
+
29
+ @staticmethod
30
+ def backward(ctx, grad_output):
31
+ x = ctx.saved_tensors[0]
32
+ sx = torch.sigmoid(x)
33
+ return grad_output * (sx * (1 + x * (1 - sx)))
34
+
35
+ def forward(self, x):
36
+ return self.F.apply(x)
37
+
38
+
39
+ # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40
+ class Mish(nn.Module):
41
+ @staticmethod
42
+ def forward(x):
43
+ return x * F.softplus(x).tanh()
44
+
45
+
46
+ class MemoryEfficientMish(nn.Module):
47
+ class F(torch.autograd.Function):
48
+ @staticmethod
49
+ def forward(ctx, x):
50
+ ctx.save_for_backward(x)
51
+ return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
52
+
53
+ @staticmethod
54
+ def backward(ctx, grad_output):
55
+ x = ctx.saved_tensors[0]
56
+ sx = torch.sigmoid(x)
57
+ fx = F.softplus(x).tanh()
58
+ return grad_output * (fx + x * sx * (1 - fx * fx))
59
+
60
+ def forward(self, x):
61
+ return self.F.apply(x)
62
+
63
+
64
+ # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65
+ class FReLU(nn.Module):
66
+ def __init__(self, c1, k=3): # ch_in, kernel
67
+ super().__init__()
68
+ self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
69
+ self.bn = nn.BatchNorm2d(c1)
70
+
71
+ def forward(self, x):
72
+ return torch.max(x, self.bn(self.conv(x)))
lib/core/evaluate.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model validation metrics
2
+
3
+ from pathlib import Path
4
+
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import torch
8
+
9
+ from . import general
10
+
11
+
12
+ def fitness(x):
13
+ # Model fitness as a weighted combination of metrics
14
+ w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
15
+ return (x[:, :4] * w).sum(1)
16
+
17
+
18
+ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
19
+ """ Compute the average precision, given the recall and precision curves.
20
+ Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
21
+ # Arguments
22
+ tp: True positives (nparray, nx1 or nx10).
23
+ conf: Objectness value from 0-1 (nparray).
24
+ pred_cls: Predicted object classes (nparray).
25
+ target_cls: True object classes (nparray).
26
+ plot: Plot precision-recall curve at mAP@0.5
27
+ save_dir: Plot save directory
28
+ # Returns
29
+ The average precision as computed in py-faster-rcnn.
30
+ """
31
+
32
+ # Sort by objectness
33
+ i = np.argsort(-conf) # sorted index from big to small
34
+ tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
35
+
36
+ # Find unique classes, each number just showed up once
37
+ unique_classes = np.unique(target_cls)
38
+
39
+ # Create Precision-Recall curve and compute AP for each class
40
+ px, py = np.linspace(0, 1, 1000), [] # for plotting
41
+ pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
42
+ s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
43
+ ap, p, r = np.zeros(s), np.zeros((unique_classes.shape[0], 1000)), np.zeros((unique_classes.shape[0], 1000))
44
+ for ci, c in enumerate(unique_classes):
45
+ i = pred_cls == c
46
+ n_l = (target_cls == c).sum() # number of labels
47
+ n_p = i.sum() # number of predictions
48
+
49
+ if n_p == 0 or n_l == 0:
50
+ continue
51
+ else:
52
+ # Accumulate FPs and TPs
53
+ fpc = (1 - tp[i]).cumsum(0)
54
+ tpc = tp[i].cumsum(0)
55
+
56
+ # Recall
57
+ recall = tpc / (n_l + 1e-16) # recall curve
58
+ r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # r at pr_score, negative x, xp because xp decreases
59
+
60
+ # Precision
61
+ precision = tpc / (tpc + fpc) # precision curve
62
+ p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
63
+
64
+ # AP from recall-precision curve
65
+ for j in range(tp.shape[1]):
66
+ ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
67
+ if plot and (j == 0):
68
+ py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
69
+
70
+ # Compute F1 score (harmonic mean of precision and recall)
71
+ f1 = 2 * p * r / (p + r + 1e-16)
72
+ i = r.mean(0).argmax()
73
+
74
+ if plot:
75
+ plot_pr_curve(px, py, ap, save_dir, names)
76
+
77
+ return p[:, i], r[:, i], ap, f1, unique_classes.astype('int32')
78
+
79
+
80
+ def compute_ap(recall, precision):
81
+ """ Compute the average precision, given the recall and precision curves
82
+ # Arguments
83
+ recall: The recall curve (list)
84
+ precision: The precision curve (list)
85
+ # Returns
86
+ Average precision, precision curve, recall curve
87
+ """
88
+
89
+ # Append sentinel values to beginning and end
90
+ mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
91
+ mpre = np.concatenate(([1.], precision, [0.]))
92
+
93
+ # Compute the precision envelope
94
+ mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
95
+
96
+ # Integrate area under curve
97
+ method = 'interp' # methods: 'continuous', 'interp'
98
+ if method == 'interp':
99
+ x = np.linspace(0, 1, 101) # 101-point interp (COCO)
100
+ ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
101
+ else: # 'continuous'
102
+ i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
103
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
104
+
105
+ return ap, mpre, mrec
106
+
107
+
108
+ class ConfusionMatrix:
109
+ # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
110
+ def __init__(self, nc, conf=0.25, iou_thres=0.45):
111
+ self.matrix = np.zeros((nc + 1, nc + 1))
112
+ self.nc = nc # number of classes
113
+ self.conf = conf
114
+ self.iou_thres = iou_thres
115
+
116
+ def process_batch(self, detections, labels):
117
+ """
118
+ Return intersection-over-union (Jaccard index) of boxes.
119
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
120
+ Arguments:
121
+ detections (Array[N, 6]), x1, y1, x2, y2, conf, class
122
+ labels (Array[M, 5]), class, x1, y1, x2, y2
123
+ Returns:
124
+ None, updates confusion matrix accordingly
125
+ """
126
+ detections = detections[detections[:, 4] > self.conf]
127
+ gt_classes = labels[:, 0].int()
128
+ detection_classes = detections[:, 5].int()
129
+ iou = general.box_iou(labels[:, 1:], detections[:, :4])
130
+
131
+ x = torch.where(iou > self.iou_thres)
132
+ if x[0].shape[0]:
133
+ matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
134
+ if x[0].shape[0] > 1:
135
+ matches = matches[matches[:, 2].argsort()[::-1]]
136
+ matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
137
+ matches = matches[matches[:, 2].argsort()[::-1]]
138
+ matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
139
+ else:
140
+ matches = np.zeros((0, 3))
141
+
142
+ n = matches.shape[0] > 0
143
+ m0, m1, _ = matches.transpose().astype(np.int16)
144
+ for i, gc in enumerate(gt_classes):
145
+ j = m0 == i
146
+ if n and sum(j) == 1:
147
+ self.matrix[gc, detection_classes[m1[j]]] += 1 # correct
148
+ else:
149
+ self.matrix[gc, self.nc] += 1 # background FP
150
+
151
+ if n:
152
+ for i, dc in enumerate(detection_classes):
153
+ if not any(m1 == i):
154
+ self.matrix[self.nc, dc] += 1 # background FN
155
+
156
+ def matrix(self):
157
+ return self.matrix
158
+
159
+ def plot(self, save_dir='', names=()):
160
+ try:
161
+ import seaborn as sn
162
+
163
+ array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize
164
+ array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
165
+
166
+ fig = plt.figure(figsize=(12, 9), tight_layout=True)
167
+ sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size
168
+ labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels
169
+ sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
170
+ xticklabels=names + ['background FN'] if labels else "auto",
171
+ yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1))
172
+ fig.axes[0].set_xlabel('True')
173
+ fig.axes[0].set_ylabel('Predicted')
174
+ fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
175
+ except Exception as e:
176
+ pass
177
+
178
+ def print(self):
179
+ for i in range(self.nc + 1):
180
+ print(' '.join(map(str, self.matrix[i])))
181
+
182
+ class SegmentationMetric(object):
183
+ '''
184
+ imgLabel [batch_size, height(144), width(256)]
185
+ confusionMatrix [[0(TN),1(FP)],
186
+ [2(FN),3(TP)]]
187
+ '''
188
+ def __init__(self, numClass):
189
+ self.numClass = numClass
190
+ self.confusionMatrix = np.zeros((self.numClass,)*2)
191
+
192
+ def pixelAccuracy(self):
193
+ # return all class overall pixel accuracy
194
+ # acc = (TP + TN) / (TP + TN + FP + TN)
195
+ acc = np.diag(self.confusionMatrix).sum() / self.confusionMatrix.sum()
196
+ return acc
197
+
198
+ def lineAccuracy(self):
199
+ Acc = np.diag(self.confusionMatrix) / (self.confusionMatrix.sum(axis=1) + 1e-12)
200
+ return Acc[1]
201
+
202
+ def classPixelAccuracy(self):
203
+ # return each category pixel accuracy(A more accurate way to call it precision)
204
+ # acc = (TP) / TP + FP
205
+ classAcc = np.diag(self.confusionMatrix) / (self.confusionMatrix.sum(axis=0) + 1e-12)
206
+ return classAcc
207
+
208
+ def meanPixelAccuracy(self):
209
+ classAcc = self.classPixelAccuracy()
210
+ meanAcc = np.nanmean(classAcc)
211
+ return meanAcc
212
+
213
+ def meanIntersectionOverUnion(self):
214
+ # Intersection = TP Union = TP + FP + FN
215
+ # IoU = TP / (TP + FP + FN)
216
+ intersection = np.diag(self.confusionMatrix)
217
+ union = np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) - np.diag(self.confusionMatrix)
218
+ IoU = intersection / union
219
+ IoU[np.isnan(IoU)] = 0
220
+ mIoU = np.nanmean(IoU)
221
+ return mIoU
222
+
223
+ def IntersectionOverUnion(self):
224
+ intersection = np.diag(self.confusionMatrix)
225
+ union = np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) - np.diag(self.confusionMatrix)
226
+ IoU = intersection / union
227
+ IoU[np.isnan(IoU)] = 0
228
+ return IoU[1]
229
+
230
+ def genConfusionMatrix(self, imgPredict, imgLabel):
231
+ # remove classes from unlabeled pixels in gt image and predict
232
+ # print(imgLabel.shape)
233
+ mask = (imgLabel >= 0) & (imgLabel < self.numClass)
234
+ label = self.numClass * imgLabel[mask] + imgPredict[mask]
235
+ count = np.bincount(label, minlength=self.numClass**2)
236
+ confusionMatrix = count.reshape(self.numClass, self.numClass)
237
+ return confusionMatrix
238
+
239
+ def Frequency_Weighted_Intersection_over_Union(self):
240
+ # FWIOU = [(TP+FN)/(TP+FP+TN+FN)] *[TP / (TP + FP + FN)]
241
+ freq = np.sum(self.confusionMatrix, axis=1) / np.sum(self.confusionMatrix)
242
+ iu = np.diag(self.confusionMatrix) / (
243
+ np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) -
244
+ np.diag(self.confusionMatrix))
245
+ FWIoU = (freq[freq > 0] * iu[freq > 0]).sum()
246
+ return FWIoU
247
+
248
+
249
+ def addBatch(self, imgPredict, imgLabel):
250
+ assert imgPredict.shape == imgLabel.shape
251
+ self.confusionMatrix += self.genConfusionMatrix(imgPredict, imgLabel)
252
+
253
+ def reset(self):
254
+ self.confusionMatrix = np.zeros((self.numClass, self.numClass))
255
+
256
+
257
+
258
+
259
+
260
+ # Plots ----------------------------------------------------------------------------------------------------------------
261
+
262
+ def plot_pr_curve(px, py, ap, save_dir='.', names=()):
263
+ fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
264
+ py = np.stack(py, axis=1)
265
+
266
+ if 0 < len(names) < 21: # show mAP in legend if < 10 classes
267
+ for i, y in enumerate(py.T):
268
+ ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision)
269
+ else:
270
+ ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
271
+
272
+ ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
273
+ ax.set_xlabel('Recall')
274
+ ax.set_ylabel('Precision')
275
+ ax.set_xlim(0, 1)
276
+ ax.set_ylim(0, 1)
277
+ plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
278
+ fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)
lib/core/function.py ADDED
@@ -0,0 +1,510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from lib.core.evaluate import ConfusionMatrix,SegmentationMetric
3
+ from lib.core.general import non_max_suppression,check_img_size,scale_coords,xyxy2xywh,xywh2xyxy,box_iou,coco80_to_coco91_class,plot_images,ap_per_class,output_to_target
4
+ from lib.utils.utils import time_synchronized
5
+ from lib.utils import plot_img_and_mask,plot_one_box,show_seg_result
6
+ import torch
7
+ from threading import Thread
8
+ import numpy as np
9
+ from PIL import Image
10
+ from torchvision import transforms
11
+ from pathlib import Path
12
+ import json
13
+ import random
14
+ import cv2
15
+ import os
16
+ import math
17
+ from torch.cuda import amp
18
+ from tqdm import tqdm
19
+
20
+
21
+ def train(cfg, train_loader, model, criterion, optimizer, scaler, epoch, num_batch, num_warmup,
22
+ writer_dict, logger, device, rank=-1):
23
+ """
24
+ train for one epoch
25
+
26
+ Inputs:
27
+ - config: configurations
28
+ - train_loader: loder for data
29
+ - model:
30
+ - criterion: (function) calculate all the loss, return total_loss, head_losses
31
+ - writer_dict:
32
+ outputs(2,)
33
+ output[0] len:3, [1,3,32,32,85], [1,3,16,16,85], [1,3,8,8,85]
34
+ output[1] len:1, [2,256,256]
35
+ output[2] len:1, [2,256,256]
36
+ target(2,)
37
+ target[0] [1,n,5]
38
+ target[1] [2,256,256]
39
+ target[2] [2,256,256]
40
+ Returns:
41
+ None
42
+
43
+ """
44
+ batch_time = AverageMeter()
45
+ data_time = AverageMeter()
46
+ losses = AverageMeter()
47
+
48
+ # switch to train mode
49
+ model.train()
50
+ start = time.time()
51
+ for i, (input, target, paths, shapes) in enumerate(train_loader):
52
+ intermediate = time.time()
53
+ #print('tims:{}'.format(intermediate-start))
54
+ num_iter = i + num_batch * (epoch - 1)
55
+
56
+ if num_iter < num_warmup:
57
+ # warm up
58
+ lf = lambda x: ((1 + math.cos(x * math.pi / cfg.TRAIN.END_EPOCH)) / 2) * \
59
+ (1 - cfg.TRAIN.LRF) + cfg.TRAIN.LRF # cosine
60
+ xi = [0, num_warmup]
61
+ # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
62
+ for j, x in enumerate(optimizer.param_groups):
63
+ # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
64
+ x['lr'] = np.interp(num_iter, xi, [cfg.TRAIN.WARMUP_BIASE_LR if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
65
+ if 'momentum' in x:
66
+ x['momentum'] = np.interp(num_iter, xi, [cfg.TRAIN.WARMUP_MOMENTUM, cfg.TRAIN.MOMENTUM])
67
+
68
+ data_time.update(time.time() - start)
69
+ if not cfg.DEBUG:
70
+ input = input.to(device, non_blocking=True)
71
+ assign_target = []
72
+ for tgt in target:
73
+ assign_target.append(tgt.to(device))
74
+ target = assign_target
75
+ with amp.autocast(enabled=device.type != 'cpu'):
76
+ outputs = model(input)
77
+ total_loss, head_losses = criterion(outputs, target, shapes,model)
78
+ # print(head_losses)
79
+
80
+ # compute gradient and do update step
81
+ optimizer.zero_grad()
82
+ scaler.scale(total_loss).backward()
83
+ scaler.step(optimizer)
84
+ scaler.update()
85
+
86
+ if rank in [-1, 0]:
87
+ # measure accuracy and record loss
88
+ losses.update(total_loss.item(), input.size(0))
89
+
90
+ # _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(),
91
+ # target.detach().cpu().numpy())
92
+ # acc.update(avg_acc, cnt)
93
+
94
+ # measure elapsed time
95
+ batch_time.update(time.time() - start)
96
+ end = time.time()
97
+ if i % cfg.PRINT_FREQ == 0:
98
+ msg = 'Epoch: [{0}][{1}/{2}]\t' \
99
+ 'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
100
+ 'Speed {speed:.1f} samples/s\t' \
101
+ 'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
102
+ 'Loss {loss.val:.5f} ({loss.avg:.5f})'.format(
103
+ epoch, i, len(train_loader), batch_time=batch_time,
104
+ speed=input.size(0)/batch_time.val,
105
+ data_time=data_time, loss=losses)
106
+ logger.info(msg)
107
+
108
+ writer = writer_dict['writer']
109
+ global_steps = writer_dict['train_global_steps']
110
+ writer.add_scalar('train_loss', losses.val, global_steps)
111
+ # writer.add_scalar('train_acc', acc.val, global_steps)
112
+ writer_dict['train_global_steps'] = global_steps + 1
113
+
114
+
115
+ def validate(epoch,config, val_loader, val_dataset, model, criterion, output_dir,
116
+ tb_log_dir, writer_dict=None, logger=None, device='cpu', rank=-1):
117
+ """
118
+ validata
119
+
120
+ Inputs:
121
+ - config: configurations
122
+ - train_loader: loder for data
123
+ - model:
124
+ - criterion: (function) calculate all the loss, return
125
+ - writer_dict:
126
+
127
+ Return:
128
+ None
129
+ """
130
+ # setting
131
+ max_stride = 32
132
+ weights = None
133
+
134
+ save_dir = output_dir + os.path.sep + 'visualization'
135
+ if not os.path.exists(save_dir):
136
+ os.mkdir(save_dir)
137
+
138
+ # print(save_dir)
139
+ _, imgsz = [check_img_size(x, s=max_stride) for x in config.MODEL.IMAGE_SIZE] #imgsz is multiple of max_stride
140
+ batch_size = config.TRAIN.BATCH_SIZE_PER_GPU * len(config.GPUS)
141
+ test_batch_size = config.TEST.BATCH_SIZE_PER_GPU * len(config.GPUS)
142
+ training = False
143
+ is_coco = False #is coco dataset
144
+ save_conf=False # save auto-label confidences
145
+ verbose=False
146
+ save_hybrid=False
147
+ log_imgs,wandb = min(16,100), None
148
+
149
+ nc = 1
150
+ iouv = torch.linspace(0.5,0.95,10).to(device) #iou vector for mAP@0.5:0.95
151
+ niou = iouv.numel()
152
+
153
+ try:
154
+ import wandb
155
+ except ImportError:
156
+ wandb = None
157
+ log_imgs = 0
158
+
159
+ seen = 0
160
+ confusion_matrix = ConfusionMatrix(nc=model.nc) #detector confusion matrix
161
+ da_metric = SegmentationMetric(config.num_seg_class) #segment confusion matrix
162
+ ll_metric = SegmentationMetric(2) #segment confusion matrix
163
+
164
+ names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
165
+ colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
166
+ coco91class = coco80_to_coco91_class()
167
+
168
+ s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
169
+ p, r, f1, mp, mr, map50, map, t_inf, t_nms = 0., 0., 0., 0., 0., 0., 0., 0., 0.
170
+
171
+ losses = AverageMeter()
172
+
173
+ da_acc_seg = AverageMeter()
174
+ da_IoU_seg = AverageMeter()
175
+ da_mIoU_seg = AverageMeter()
176
+
177
+ ll_acc_seg = AverageMeter()
178
+ ll_IoU_seg = AverageMeter()
179
+ ll_mIoU_seg = AverageMeter()
180
+
181
+ T_inf = AverageMeter()
182
+ T_nms = AverageMeter()
183
+
184
+ # switch to train mode
185
+ model.eval()
186
+ jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
187
+
188
+ for batch_i, (img, target, paths, shapes) in tqdm(enumerate(val_loader), total=len(val_loader)):
189
+ if not config.DEBUG:
190
+ img = img.to(device, non_blocking=True)
191
+ assign_target = []
192
+ for tgt in target:
193
+ assign_target.append(tgt.to(device))
194
+ target = assign_target
195
+ nb, _, height, width = img.shape #batch size, channel, height, width
196
+
197
+ with torch.no_grad():
198
+ pad_w, pad_h = shapes[0][1][1]
199
+ pad_w = int(pad_w)
200
+ pad_h = int(pad_h)
201
+ ratio = shapes[0][1][0][0]
202
+
203
+ t = time_synchronized()
204
+ det_out, da_seg_out, ll_seg_out= model(img)
205
+ t_inf = time_synchronized() - t
206
+ if batch_i > 0:
207
+ T_inf.update(t_inf/img.size(0),img.size(0))
208
+
209
+ inf_out,train_out = det_out
210
+
211
+ #driving area segment evaluation
212
+ _,da_predict=torch.max(da_seg_out, 1)
213
+ _,da_gt=torch.max(target[1], 1)
214
+ da_predict = da_predict[:, pad_h:height-pad_h, pad_w:width-pad_w]
215
+ da_gt = da_gt[:, pad_h:height-pad_h, pad_w:width-pad_w]
216
+
217
+ da_metric.reset()
218
+ da_metric.addBatch(da_predict.cpu(), da_gt.cpu())
219
+ da_acc = da_metric.pixelAccuracy()
220
+ da_IoU = da_metric.IntersectionOverUnion()
221
+ da_mIoU = da_metric.meanIntersectionOverUnion()
222
+
223
+ da_acc_seg.update(da_acc,img.size(0))
224
+ da_IoU_seg.update(da_IoU,img.size(0))
225
+ da_mIoU_seg.update(da_mIoU,img.size(0))
226
+
227
+ #lane line segment evaluation
228
+ _,ll_predict=torch.max(ll_seg_out, 1)
229
+ _,ll_gt=torch.max(target[2], 1)
230
+ ll_predict = ll_predict[:, pad_h:height-pad_h, pad_w:width-pad_w]
231
+ ll_gt = ll_gt[:, pad_h:height-pad_h, pad_w:width-pad_w]
232
+
233
+ ll_metric.reset()
234
+ ll_metric.addBatch(ll_predict.cpu(), ll_gt.cpu())
235
+ ll_acc = ll_metric.lineAccuracy()
236
+ ll_IoU = ll_metric.IntersectionOverUnion()
237
+ ll_mIoU = ll_metric.meanIntersectionOverUnion()
238
+
239
+ ll_acc_seg.update(ll_acc,img.size(0))
240
+ ll_IoU_seg.update(ll_IoU,img.size(0))
241
+ ll_mIoU_seg.update(ll_mIoU,img.size(0))
242
+
243
+ total_loss, head_losses = criterion((train_out,da_seg_out, ll_seg_out), target, shapes,model) #Compute loss
244
+ losses.update(total_loss.item(), img.size(0))
245
+
246
+ #NMS
247
+ t = time_synchronized()
248
+ target[0][:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
249
+ lb = [target[0][target[0][:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
250
+ output = non_max_suppression(inf_out, conf_thres= config.TEST.NMS_CONF_THRESHOLD, iou_thres=config.TEST.NMS_IOU_THRESHOLD, labels=lb)
251
+ #output = non_max_suppression(inf_out, conf_thres=0.001, iou_thres=0.6)
252
+ #output = non_max_suppression(inf_out, conf_thres=config.TEST.NMS_CONF_THRES, iou_thres=config.TEST.NMS_IOU_THRES)
253
+ t_nms = time_synchronized() - t
254
+ if batch_i > 0:
255
+ T_nms.update(t_nms/img.size(0),img.size(0))
256
+
257
+ if config.TEST.PLOTS:
258
+ if batch_i == 0:
259
+ for i in range(test_batch_size):
260
+ img_test = cv2.imread(paths[i])
261
+ da_seg_mask = da_seg_out[i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
262
+ da_seg_mask = torch.nn.functional.interpolate(da_seg_mask, scale_factor=int(1/ratio), mode='bilinear')
263
+ _, da_seg_mask = torch.max(da_seg_mask, 1)
264
+
265
+ da_gt_mask = target[1][i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
266
+ da_gt_mask = torch.nn.functional.interpolate(da_gt_mask, scale_factor=int(1/ratio), mode='bilinear')
267
+ _, da_gt_mask = torch.max(da_gt_mask, 1)
268
+
269
+ da_seg_mask = da_seg_mask.int().squeeze().cpu().numpy()
270
+ da_gt_mask = da_gt_mask.int().squeeze().cpu().numpy()
271
+ # seg_mask = seg_mask > 0.5
272
+ # plot_img_and_mask(img_test, seg_mask, i,epoch,save_dir)
273
+ img_test1 = img_test.copy()
274
+ _ = show_seg_result(img_test, da_seg_mask, i,epoch,save_dir)
275
+ _ = show_seg_result(img_test1, da_gt_mask, i, epoch, save_dir, is_gt=True)
276
+
277
+ img_ll = cv2.imread(paths[i])
278
+ ll_seg_mask = ll_seg_out[i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
279
+ ll_seg_mask = torch.nn.functional.interpolate(ll_seg_mask, scale_factor=int(1/ratio), mode='bilinear')
280
+ _, ll_seg_mask = torch.max(ll_seg_mask, 1)
281
+
282
+ ll_gt_mask = target[2][i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
283
+ ll_gt_mask = torch.nn.functional.interpolate(ll_gt_mask, scale_factor=int(1/ratio), mode='bilinear')
284
+ _, ll_gt_mask = torch.max(ll_gt_mask, 1)
285
+
286
+ ll_seg_mask = ll_seg_mask.int().squeeze().cpu().numpy()
287
+ ll_gt_mask = ll_gt_mask.int().squeeze().cpu().numpy()
288
+ # seg_mask = seg_mask > 0.5
289
+ # plot_img_and_mask(img_test, seg_mask, i,epoch,save_dir)
290
+ img_ll1 = img_ll.copy()
291
+ _ = show_seg_result(img_ll, ll_seg_mask, i,epoch,save_dir, is_ll=True)
292
+ _ = show_seg_result(img_ll1, ll_gt_mask, i, epoch, save_dir, is_ll=True, is_gt=True)
293
+
294
+ img_det = cv2.imread(paths[i])
295
+ img_gt = img_det.copy()
296
+ det = output[i].clone()
297
+ if len(det):
298
+ det[:,:4] = scale_coords(img[i].shape[1:],det[:,:4],img_det.shape).round()
299
+ for *xyxy,conf,cls in reversed(det):
300
+ #print(cls)
301
+ label_det_pred = f'{names[int(cls)]} {conf:.2f}'
302
+ plot_one_box(xyxy, img_det , label=label_det_pred, color=colors[int(cls)], line_thickness=3)
303
+ cv2.imwrite(save_dir+"/batch_{}_{}_det_pred.png".format(epoch,i),img_det)
304
+
305
+ labels = target[0][target[0][:, 0] == i, 1:]
306
+ # print(labels)
307
+ labels[:,1:5]=xywh2xyxy(labels[:,1:5])
308
+ if len(labels):
309
+ labels[:,1:5]=scale_coords(img[i].shape[1:],labels[:,1:5],img_gt.shape).round()
310
+ for cls,x1,y1,x2,y2 in labels:
311
+ #print(names)
312
+ #print(cls)
313
+ label_det_gt = f'{names[int(cls)]}'
314
+ xyxy = (x1,y1,x2,y2)
315
+ plot_one_box(xyxy, img_gt , label=label_det_gt, color=colors[int(cls)], line_thickness=3)
316
+ cv2.imwrite(save_dir+"/batch_{}_{}_det_gt.png".format(epoch,i),img_gt)
317
+
318
+ # Statistics per image
319
+ # output([xyxy,conf,cls])
320
+ # target[0] ([img_id,cls,xyxy])
321
+ for si, pred in enumerate(output):
322
+ labels = target[0][target[0][:, 0] == si, 1:] #all object in one image
323
+ nl = len(labels) # num of object
324
+ tcls = labels[:, 0].tolist() if nl else [] # target class
325
+ path = Path(paths[si])
326
+ seen += 1
327
+
328
+ if len(pred) == 0:
329
+ if nl:
330
+ stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
331
+ continue
332
+
333
+ # Predictions
334
+ predn = pred.clone()
335
+ scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred
336
+
337
+ # Append to text file
338
+ if config.TEST.SAVE_TXT:
339
+ gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh
340
+ for *xyxy, conf, cls in predn.tolist():
341
+ xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
342
+ line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
343
+ with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:
344
+ f.write(('%g ' * len(line)).rstrip() % line + '\n')
345
+
346
+ # W&B logging
347
+ if config.TEST.PLOTS and len(wandb_images) < log_imgs:
348
+ box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
349
+ "class_id": int(cls),
350
+ "box_caption": "%s %.3f" % (names[cls], conf),
351
+ "scores": {"class_score": conf},
352
+ "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
353
+ boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
354
+ wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name))
355
+
356
+ # Append to pycocotools JSON dictionary
357
+ if config.TEST.SAVE_JSON:
358
+ # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
359
+ image_id = int(path.stem) if path.stem.isnumeric() else path.stem
360
+ box = xyxy2xywh(predn[:, :4]) # xywh
361
+ box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
362
+ for p, b in zip(pred.tolist(), box.tolist()):
363
+ jdict.append({'image_id': image_id,
364
+ 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]),
365
+ 'bbox': [round(x, 3) for x in b],
366
+ 'score': round(p[4], 5)})
367
+
368
+
369
+ # Assign all predictions as incorrect
370
+ correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
371
+ if nl:
372
+ detected = [] # target indices
373
+ tcls_tensor = labels[:, 0]
374
+
375
+ # target boxes
376
+ tbox = xywh2xyxy(labels[:, 1:5])
377
+ scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels
378
+ if config.TEST.PLOTS:
379
+ confusion_matrix.process_batch(pred, torch.cat((labels[:, 0:1], tbox), 1))
380
+
381
+ # Per target class
382
+ for cls in torch.unique(tcls_tensor):
383
+ ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices
384
+ pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices
385
+
386
+ # Search for detections
387
+ if pi.shape[0]:
388
+ # Prediction to target ious
389
+ # n*m n:pred m:label
390
+ ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1) # best ious, indices
391
+ # Append detections
392
+ detected_set = set()
393
+ for j in (ious > iouv[0]).nonzero(as_tuple=False):
394
+ d = ti[i[j]] # detected target
395
+ if d.item() not in detected_set:
396
+ detected_set.add(d.item())
397
+ detected.append(d)
398
+ correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn
399
+ if len(detected) == nl: # all targets already located in image
400
+ break
401
+
402
+ # Append statistics (correct, conf, pcls, tcls)
403
+ stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
404
+
405
+ if config.TEST.PLOTS and batch_i < 3:
406
+ f = save_dir +'/'+ f'test_batch{batch_i}_labels.jpg' # labels
407
+ #Thread(target=plot_images, args=(img, target[0], paths, f, names), daemon=True).start()
408
+ f = save_dir +'/'+ f'test_batch{batch_i}_pred.jpg' # predictions
409
+ #Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start()
410
+
411
+ # Compute statistics
412
+ # stats : [[all_img_correct]...[all_img_tcls]]
413
+ stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy zip(*) :unzip
414
+
415
+ map70 = None
416
+ map75 = None
417
+ if len(stats) and stats[0].any():
418
+ p, r, ap, f1, ap_class = ap_per_class(*stats, plot=False, save_dir=save_dir, names=names)
419
+ ap50, ap70, ap75,ap = ap[:, 0], ap[:,4], ap[:,5],ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95]
420
+ mp, mr, map50, map70, map75, map = p.mean(), r.mean(), ap50.mean(), ap70.mean(),ap75.mean(),ap.mean()
421
+ nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
422
+ else:
423
+ nt = torch.zeros(1)
424
+
425
+ # Print results
426
+ pf = '%20s' + '%12.3g' * 6 # print format
427
+ print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
428
+ #print(map70)
429
+ #print(map75)
430
+
431
+ # Print results per class
432
+ if (verbose or (nc <= 20 and not training)) and nc > 1 and len(stats):
433
+ for i, c in enumerate(ap_class):
434
+ print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
435
+
436
+ # Print speeds
437
+ t = tuple(x / seen * 1E3 for x in (t_inf, t_nms, t_inf + t_nms)) + (imgsz, imgsz, batch_size) # tuple
438
+ if not training:
439
+ print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
440
+
441
+ # Plots
442
+ if config.TEST.PLOTS:
443
+ confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
444
+ if wandb and wandb.run:
445
+ wandb.log({"Images": wandb_images})
446
+ wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]})
447
+
448
+ # Save JSON
449
+ if config.TEST.SAVE_JSON and len(jdict):
450
+ w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights
451
+ anno_json = '../coco/annotations/instances_val2017.json' # annotations json
452
+ pred_json = str(save_dir / f"{w}_predictions.json") # predictions json
453
+ print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)
454
+ with open(pred_json, 'w') as f:
455
+ json.dump(jdict, f)
456
+
457
+ try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
458
+ from pycocotools.coco import COCO
459
+ from pycocotools.cocoeval import COCOeval
460
+
461
+ anno = COCO(anno_json) # init annotations api
462
+ pred = anno.loadRes(pred_json) # init predictions api
463
+ eval = COCOeval(anno, pred, 'bbox')
464
+ if is_coco:
465
+ eval.params.imgIds = [int(Path(x).stem) for x in val_loader.dataset.img_files] # image IDs to evaluate
466
+ eval.evaluate()
467
+ eval.accumulate()
468
+ eval.summarize()
469
+ map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5)
470
+ except Exception as e:
471
+ print(f'pycocotools unable to run: {e}')
472
+
473
+ # Return results
474
+ if not training:
475
+ s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if config.TEST.SAVE_TXT else ''
476
+ print(f"Results saved to {save_dir}{s}")
477
+ model.float() # for training
478
+ maps = np.zeros(nc) + map
479
+ for i, c in enumerate(ap_class):
480
+ maps[c] = ap[i]
481
+
482
+ da_segment_result = (da_acc_seg.avg,da_IoU_seg.avg,da_mIoU_seg.avg)
483
+ ll_segment_result = (ll_acc_seg.avg,ll_IoU_seg.avg,ll_mIoU_seg.avg)
484
+
485
+ # print(da_segment_result)
486
+ # print(ll_segment_result)
487
+ detect_result = np.asarray([mp, mr, map50, map])
488
+ # print('mp:{},mr:{},map50:{},map:{}'.format(mp, mr, map50, map))
489
+ #print segmet_result
490
+ t = [T_inf.avg, T_nms.avg]
491
+ return da_segment_result, ll_segment_result, detect_result, losses.avg, maps, t
492
+
493
+
494
+
495
+ class AverageMeter(object):
496
+ """Computes and stores the average and current value"""
497
+ def __init__(self):
498
+ self.reset()
499
+
500
+ def reset(self):
501
+ self.val = 0
502
+ self.avg = 0
503
+ self.sum = 0
504
+ self.count = 0
505
+
506
+ def update(self, val, n=1):
507
+ self.val = val
508
+ self.sum += val * n
509
+ self.count += n
510
+ self.avg = self.sum / self.count if self.count != 0 else 0
lib/core/general.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import logging
3
+ import os
4
+ import platform
5
+ import random
6
+ import re
7
+ import shutil
8
+ import subprocess
9
+ import time
10
+ import torchvision
11
+ from contextlib import contextmanager
12
+ from copy import copy
13
+ from pathlib import Path
14
+
15
+ import cv2
16
+ import math
17
+ import matplotlib
18
+ import matplotlib.pyplot as plt
19
+ import numpy as np
20
+ import torch
21
+ import torch.nn as nn
22
+ import yaml
23
+ from PIL import Image
24
+ from scipy.cluster.vq import kmeans
25
+ from scipy.signal import butter, filtfilt
26
+ from tqdm import tqdm
27
+
28
+
29
+ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9):
30
+ # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
31
+ box2 = box2.T
32
+
33
+ # Get the coordinates of bounding boxes
34
+ if x1y1x2y2: # x1, y1, x2, y2 = box1
35
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
36
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
37
+ else: # transform from xywh to xyxy
38
+ b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
39
+ b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
40
+ b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
41
+ b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
42
+
43
+ # Intersection area
44
+ inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
45
+ (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
46
+
47
+ # Union Area
48
+ w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
49
+ w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
50
+ union = w1 * h1 + w2 * h2 - inter + eps
51
+
52
+ iou = inter / union
53
+ if GIoU or DIoU or CIoU:
54
+ cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width
55
+ ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
56
+ if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
57
+ c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
58
+ rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
59
+ (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
60
+ if DIoU:
61
+ return iou - rho2 / c2 # DIoU
62
+ elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
63
+ v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
64
+ with torch.no_grad():
65
+ alpha = v / ((1 + eps) - iou + v)
66
+ return iou - (rho2 / c2 + v * alpha) # CIoU
67
+ else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
68
+ c_area = cw * ch + eps # convex area
69
+ return iou - (c_area - union) / c_area # GIoU
70
+ else:
71
+ return iou # IoU
72
+
73
+
74
+ def box_iou(box1, box2):
75
+ # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
76
+ """
77
+ Return intersection-over-union (Jaccard index) of boxes.
78
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
79
+ Arguments:
80
+ box1 (Tensor[N, 4])
81
+ box2 (Tensor[M, 4])
82
+ Returns:
83
+ iou (Tensor[N, M]): the NxM matrix containing the pairwise
84
+ IoU values for every element in boxes1 and boxes2
85
+ """
86
+
87
+ def box_area(box):
88
+ # box = 4xn
89
+ return (box[2] - box[0]) * (box[3] - box[1]) #(x2-x1)*(y2-y1)
90
+
91
+ area1 = box_area(box1.T)
92
+ area2 = box_area(box2.T)
93
+
94
+ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
95
+ inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
96
+ return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
97
+
98
+ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()):
99
+ """Performs Non-Maximum Suppression (NMS) on inference results
100
+
101
+ Returns:
102
+ detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
103
+ """
104
+
105
+ nc = prediction.shape[2] - 5 # number of classes
106
+ xc = prediction[..., 4] > conf_thres # candidates
107
+
108
+ # Settings
109
+ min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
110
+ max_det = 300 # maximum number of detections per image
111
+ max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
112
+ time_limit = 10.0 # seconds to quit after
113
+ redundant = True # require redundant detections
114
+ multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
115
+ merge = False # use merge-NMS
116
+
117
+ t = time.time()
118
+ output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
119
+ for xi, x in enumerate(prediction): # image index, image inference
120
+ # Apply constraints
121
+ # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
122
+ x = x[xc[xi]] # confidence
123
+
124
+ # Cat apriori labels if autolabelling
125
+ if labels and len(labels[xi]):
126
+ l = labels[xi]
127
+ v = torch.zeros((len(l), nc + 5), device=x.device)
128
+ v[:, :4] = l[:, 1:5] # box
129
+ v[:, 4] = 1.0 # conf
130
+ v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
131
+ x = torch.cat((x, v), 0)
132
+
133
+ # If none remain process next image
134
+ if not x.shape[0]:
135
+ continue
136
+
137
+ # Compute conf
138
+ x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
139
+
140
+ # Box (center x, center y, width, height) to (x1, y1, x2, y2)
141
+ box = xywh2xyxy(x[:, :4])
142
+
143
+ # Detections matrix nx6 (xyxy, conf, cls)
144
+ if multi_label:
145
+ i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
146
+ x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
147
+ else: # best class only
148
+ conf, j = x[:, 5:].max(1, keepdim=True)
149
+ x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
150
+
151
+ # Filter by class
152
+ if classes is not None:
153
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
154
+
155
+ # Apply finite constraint
156
+ # if not torch.isfinite(x).all():
157
+ # x = x[torch.isfinite(x).all(1)]
158
+
159
+ # Check shape
160
+ n = x.shape[0] # number of boxes
161
+ if not n: # no boxes
162
+ continue
163
+ elif n > max_nms: # excess boxes
164
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
165
+
166
+ # Batched NMS
167
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
168
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
169
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
170
+ if i.shape[0] > max_det: # limit detections
171
+ i = i[:max_det]
172
+ if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
173
+ # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
174
+ iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
175
+ weights = iou * scores[None] # box weights
176
+ x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
177
+ if redundant:
178
+ i = i[iou.sum(1) > 1] # require redundancy
179
+
180
+ output[xi] = x[i]
181
+ if (time.time() - t) > time_limit:
182
+ print(f'WARNING: NMS time limit {time_limit}s exceeded')
183
+ break # time limit exceeded
184
+
185
+ return output
186
+
187
+
188
+ def xywh2xyxy(x):
189
+ # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
190
+ y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
191
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
192
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
193
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
194
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
195
+ return y
196
+
197
+ def fitness(x):
198
+ # Returns fitness (for use with results.txt or evolve.txt)
199
+ w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
200
+ return (x[:, :4] * w).sum(1)
201
+
202
+ def check_img_size(img_size, s=32):
203
+ # Verify img_size is a multiple of stride s
204
+ new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
205
+ if new_size != img_size:
206
+ print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
207
+ return new_size
208
+
209
+ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
210
+ # Rescale coords (xyxy) from img1_shape to img0_shape
211
+ if ratio_pad is None: # calculate from img0_shape
212
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
213
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
214
+ else:
215
+ gain = ratio_pad[0][0]
216
+ pad = ratio_pad[1]
217
+
218
+ coords[:, [0, 2]] -= pad[0] # x padding
219
+ coords[:, [1, 3]] -= pad[1] # y padding
220
+ coords[:, :4] /= gain
221
+ clip_coords(coords, img0_shape)
222
+ return coords
223
+
224
+ def clip_coords(boxes, img_shape):
225
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
226
+ boxes[:, 0].clamp_(0, img_shape[1]) # x1
227
+ boxes[:, 1].clamp_(0, img_shape[0]) # y1
228
+ boxes[:, 2].clamp_(0, img_shape[1]) # x2
229
+ boxes[:, 3].clamp_(0, img_shape[0]) # y2
230
+
231
+ def make_divisible(x, divisor):
232
+ # Returns x evenly divisible by divisor
233
+ return math.ceil(x / divisor) * divisor
234
+
235
+ def xyxy2xywh(x):
236
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
237
+ y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
238
+ y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
239
+ y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
240
+ y[:, 2] = x[:, 2] - x[:, 0] # width
241
+ y[:, 3] = x[:, 3] - x[:, 1] # height
242
+ return y
243
+
244
+ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
245
+ # Plot image grid with labels
246
+
247
+ if isinstance(images, torch.Tensor):
248
+ images = images.cpu().float().numpy()
249
+ if isinstance(targets, torch.Tensor):
250
+ targets = targets.cpu().numpy()
251
+
252
+ # un-normalise
253
+ if np.max(images[0]) <= 1:
254
+ images *= 255
255
+
256
+ tl = 3 # line thickness
257
+ tf = max(tl - 1, 1) # font thickness
258
+ bs, _, h, w = images.shape # batch size, _, height, width
259
+ bs = min(bs, max_subplots) # limit plot images
260
+ ns = np.ceil(bs ** 0.5) # number of subplots (square)
261
+
262
+ # Check if we should resize
263
+ scale_factor = max_size / max(h, w)
264
+ if scale_factor < 1:
265
+ h = math.ceil(scale_factor * h)
266
+ w = math.ceil(scale_factor * w)
267
+
268
+ colors = color_list() # list of colors
269
+ mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init
270
+ for i, img in enumerate(images):
271
+ if i == max_subplots: # if last batch has fewer images than we expect
272
+ break
273
+
274
+ block_x = int(w * (i // ns))
275
+ block_y = int(h * (i % ns))
276
+
277
+ img = img.transpose(1, 2, 0)
278
+ if scale_factor < 1:
279
+ img = cv2.resize(img, (w, h))
280
+
281
+ mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
282
+ if len(targets) > 0:
283
+ image_targets = targets[targets[:, 0] == i]
284
+ boxes = xywh2xyxy(image_targets[:, 2:6]).T
285
+ classes = image_targets[:, 1].astype('int')
286
+ labels = image_targets.shape[1] == 6 # labels if no conf column
287
+ conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred)
288
+
289
+ if boxes.shape[1]:
290
+ if boxes.max() <= 1.01: # if normalized with tolerance 0.01
291
+ boxes[[0, 2]] *= w # scale to pixels
292
+ boxes[[1, 3]] *= h
293
+ elif scale_factor < 1: # absolute coords need scale if image scales
294
+ boxes *= scale_factor
295
+ boxes[[0, 2]] += block_x
296
+ boxes[[1, 3]] += block_y
297
+ for j, box in enumerate(boxes.T):
298
+ cls = int(classes[j])
299
+ color = colors[cls % len(colors)]
300
+ cls = names[cls] if names else cls
301
+ if labels or conf[j] > 0.25: # 0.25 conf thresh
302
+ label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
303
+ plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
304
+
305
+ # Draw image filename labels
306
+ if paths:
307
+ label = Path(paths[i]).name[:40] # trim to 40 char
308
+ t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
309
+ cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
310
+ lineType=cv2.LINE_AA)
311
+
312
+ # Image border
313
+ cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
314
+
315
+ if fname:
316
+ r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size
317
+ mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
318
+ # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save
319
+ Image.fromarray(mosaic).save(fname) # PIL save
320
+ return mosaic
321
+
322
+ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
323
+ # Plots one bounding box on image img
324
+ tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
325
+ color = color or [random.randint(0, 255) for _ in range(3)]
326
+ c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
327
+ cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
328
+ if label:
329
+ tf = max(tl - 1, 1) # font thickness
330
+ t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
331
+ c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
332
+ cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
333
+ cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
334
+
335
+ def color_list():
336
+ # Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb
337
+ def hex2rgb(h):
338
+ return tuple(int(str(h[1 + i:1 + i + 2]), 16) for i in (0, 2, 4))
339
+
340
+ return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']]
341
+
342
+ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
343
+ """ Compute the average precision, given the recall and precision curves.
344
+ Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
345
+ # Arguments
346
+ tp: True positives (nparray, nx1 or nx10).
347
+ conf: Objectness value from 0-1 (nparray).
348
+ pred_cls: Predicted object classes (nparray).
349
+ target_cls: True object classes (nparray).
350
+ plot: Plot precision-recall curve at mAP@0.5
351
+ save_dir: Plot save directory
352
+ # Returns
353
+ The average precision as computed in py-faster-rcnn.
354
+ """
355
+
356
+ # Sort by objectness
357
+ i = np.argsort(-conf)
358
+ tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
359
+
360
+ # Find unique classes
361
+ unique_classes = np.unique(target_cls)
362
+
363
+ # Create Precision-Recall curve and compute AP for each class
364
+ px, py = np.linspace(0, 1, 1000), [] # for plotting
365
+ pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
366
+ s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
367
+ ap, p, r = np.zeros(s), np.zeros((unique_classes.shape[0], 1000)), np.zeros((unique_classes.shape[0], 1000))
368
+ for ci, c in enumerate(unique_classes):
369
+ i = pred_cls == c
370
+ n_l = (target_cls == c).sum() # number of labels
371
+ n_p = i.sum() # number of predictions
372
+
373
+ if n_p == 0 or n_l == 0:
374
+ continue
375
+ else:
376
+ # Accumulate FPs and TPs
377
+ fpc = (1 - tp[i]).cumsum(0)
378
+ tpc = tp[i].cumsum(0)
379
+
380
+ # Recall
381
+ recall = tpc / (n_l + 1e-16) # recall curve
382
+ r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
383
+
384
+ # Precision
385
+ precision = tpc / (tpc + fpc) # precision curve
386
+ p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
387
+ # AP from recall-precision curve
388
+ for j in range(tp.shape[1]):
389
+ ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
390
+ if plot and (j == 0):
391
+ py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
392
+
393
+ # Compute F1 score (harmonic mean of precision and recall)
394
+ f1 = 2 * p * r / (p + r + 1e-16)
395
+ i=r.mean(0).argmax()
396
+
397
+ if plot:
398
+ plot_pr_curve(px, py, ap, save_dir, names)
399
+
400
+ return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
401
+
402
+ def compute_ap(recall, precision):
403
+ """ Compute the average precision, given the recall and precision curves.
404
+ Source: https://github.com/rbgirshick/py-faster-rcnn.
405
+ # Arguments
406
+ recall: The recall curve (list).
407
+ precision: The precision curve (list).
408
+ # Returns
409
+ The average precision as computed in py-faster-rcnn.
410
+ """
411
+
412
+ # Append sentinel values to beginning and end
413
+ mrec = np.concatenate(([0.], recall, [recall[-1] + 1E-3]))
414
+ mpre = np.concatenate(([1.], precision, [0.]))
415
+
416
+ # Compute the precision envelope
417
+ mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
418
+
419
+ # Integrate area under curve
420
+ method = 'interp' # methods: 'continuous', 'interp'
421
+ if method == 'interp':
422
+ x = np.linspace(0, 1, 101) # 101-point interp (COCO)
423
+ ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
424
+
425
+ else: # 'continuous'
426
+ i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
427
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
428
+
429
+ return ap, mpre, mrec
430
+
431
+ def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
432
+ # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
433
+ # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
434
+ # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
435
+ # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
436
+ # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
437
+ x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
438
+ 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
439
+ 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
440
+ return x
441
+
442
+ def output_to_target(output):
443
+ # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
444
+ targets = []
445
+ for i, o in enumerate(output):
446
+ for *box, conf, cls in o.cpu().numpy():
447
+ targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
448
+ return np.array(targets)
449
+
450
+ def plot_pr_curve(px, py, ap, save_dir='.', names=()):
451
+ fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
452
+ py = np.stack(py, axis=1)
453
+
454
+ if 0 < len(names) < 21: # show mAP in legend if < 10 classes
455
+ for i, y in enumerate(py.T):
456
+ ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision)
457
+ else:
458
+ ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
459
+
460
+ ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
461
+ ax.set_xlabel('Recall')
462
+ ax.set_ylabel('Precision')
463
+ ax.set_xlim(0, 1)
464
+ ax.set_ylim(0, 1)
465
+ plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
466
+ fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)
lib/core/loss.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch
3
+ from .general import bbox_iou
4
+ from .postprocess import build_targets
5
+ from lib.core.evaluate import SegmentationMetric
6
+
7
+ class MultiHeadLoss(nn.Module):
8
+ """
9
+ collect all the loss we need
10
+ """
11
+ def __init__(self, losses, cfg, lambdas=None):
12
+ """
13
+ Inputs:
14
+ - losses: (list)[nn.Module, nn.Module, ...]
15
+ - cfg: config object
16
+ - lambdas: (list) + IoU loss, weight for each loss
17
+ """
18
+ super().__init__()
19
+ # lambdas: [cls, obj, iou, la_seg, ll_seg, ll_iou]
20
+ if not lambdas:
21
+ lambdas = [1.0 for _ in range(len(losses) + 3)]
22
+ assert all(lam >= 0.0 for lam in lambdas)
23
+
24
+ self.losses = nn.ModuleList(losses)
25
+ self.lambdas = lambdas
26
+ self.cfg = cfg
27
+
28
+ def forward(self, head_fields, head_targets, shapes, model):
29
+ """
30
+ Inputs:
31
+ - head_fields: (list) output from each task head
32
+ - head_targets: (list) ground-truth for each task head
33
+ - model:
34
+
35
+ Returns:
36
+ - total_loss: sum of all the loss
37
+ - head_losses: (tuple) contain all loss[loss1, loss2, ...]
38
+
39
+ """
40
+ # head_losses = [ll
41
+ # for l, f, t in zip(self.losses, head_fields, head_targets)
42
+ # for ll in l(f, t)]
43
+ #
44
+ # assert len(self.lambdas) == len(head_losses)
45
+ # loss_values = [lam * l
46
+ # for lam, l in zip(self.lambdas, head_losses)
47
+ # if l is not None]
48
+ # total_loss = sum(loss_values) if loss_values else None
49
+ # print(model.nc)
50
+ total_loss, head_losses = self._forward_impl(head_fields, head_targets, shapes, model)
51
+
52
+ return total_loss, head_losses
53
+
54
+ def _forward_impl(self, predictions, targets, shapes, model):
55
+ """
56
+
57
+ Args:
58
+ predictions: predicts of [[det_head1, det_head2, det_head3], drive_area_seg_head, lane_line_seg_head]
59
+ targets: gts [det_targets, segment_targets, lane_targets]
60
+ model:
61
+
62
+ Returns:
63
+ total_loss: sum of all the loss
64
+ head_losses: list containing losses
65
+
66
+ """
67
+ cfg = self.cfg
68
+ device = targets[0].device
69
+ lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
70
+ tcls, tbox, indices, anchors = build_targets(cfg, predictions[0], targets[0], model) # targets
71
+
72
+ # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
73
+ cp, cn = smooth_BCE(eps=0.0)
74
+
75
+ BCEcls, BCEobj, BCEseg = self.losses
76
+
77
+ # Calculate Losses
78
+ nt = 0 # number of targets
79
+ no = len(predictions[0]) # number of outputs
80
+ balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6
81
+
82
+ # calculate detection loss
83
+ for i, pi in enumerate(predictions[0]): # layer index, layer predictions
84
+ b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
85
+ tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
86
+
87
+ n = b.shape[0] # number of targets
88
+ if n:
89
+ nt += n # cumulative targets
90
+ ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
91
+
92
+ # Regression
93
+ pxy = ps[:, :2].sigmoid() * 2. - 0.5
94
+ pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
95
+ pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box
96
+ iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
97
+ lbox += (1.0 - iou).mean() # iou loss
98
+
99
+ # Objectness
100
+ tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio
101
+
102
+ # Classification
103
+ # print(model.nc)
104
+ if model.nc > 1: # cls loss (only if multiple classes)
105
+ t = torch.full_like(ps[:, 5:], cn, device=device) # targets
106
+ t[range(n), tcls[i]] = cp
107
+ lcls += BCEcls(ps[:, 5:], t) # BCE
108
+ lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss
109
+
110
+ drive_area_seg_predicts = predictions[1].view(-1)
111
+ drive_area_seg_targets = targets[1].view(-1)
112
+ lseg_da = BCEseg(drive_area_seg_predicts, drive_area_seg_targets)
113
+
114
+ lane_line_seg_predicts = predictions[2].view(-1)
115
+ lane_line_seg_targets = targets[2].view(-1)
116
+ lseg_ll = BCEseg(lane_line_seg_predicts, lane_line_seg_targets)
117
+
118
+ metric = SegmentationMetric(2)
119
+ nb, _, height, width = targets[1].shape
120
+ pad_w, pad_h = shapes[0][1][1]
121
+ pad_w = int(pad_w)
122
+ pad_h = int(pad_h)
123
+ _,lane_line_pred=torch.max(predictions[2], 1)
124
+ _,lane_line_gt=torch.max(targets[2], 1)
125
+ lane_line_pred = lane_line_pred[:, pad_h:height-pad_h, pad_w:width-pad_w]
126
+ lane_line_gt = lane_line_gt[:, pad_h:height-pad_h, pad_w:width-pad_w]
127
+ metric.reset()
128
+ metric.addBatch(lane_line_pred.cpu(), lane_line_gt.cpu())
129
+ IoU = metric.IntersectionOverUnion()
130
+ liou_ll = 1 - IoU
131
+
132
+ s = 3 / no # output count scaling
133
+ lcls *= cfg.LOSS.CLS_GAIN * s * self.lambdas[0]
134
+ lobj *= cfg.LOSS.OBJ_GAIN * s * (1.4 if no == 4 else 1.) * self.lambdas[1]
135
+ lbox *= cfg.LOSS.BOX_GAIN * s * self.lambdas[2]
136
+
137
+ lseg_da *= cfg.LOSS.DA_SEG_GAIN * self.lambdas[3]
138
+ lseg_ll *= cfg.LOSS.LL_SEG_GAIN * self.lambdas[4]
139
+ liou_ll *= cfg.LOSS.LL_IOU_GAIN * self.lambdas[5]
140
+
141
+
142
+ if cfg.TRAIN.DET_ONLY or cfg.TRAIN.ENC_DET_ONLY or cfg.TRAIN.DET_ONLY:
143
+ lseg_da = 0 * lseg_da
144
+ lseg_ll = 0 * lseg_ll
145
+ liou_ll = 0 * liou_ll
146
+
147
+ if cfg.TRAIN.SEG_ONLY or cfg.TRAIN.ENC_SEG_ONLY:
148
+ lcls = 0 * lcls
149
+ lobj = 0 * lobj
150
+ lbox = 0 * lbox
151
+
152
+ if cfg.TRAIN.LANE_ONLY:
153
+ lcls = 0 * lcls
154
+ lobj = 0 * lobj
155
+ lbox = 0 * lbox
156
+ lseg_da = 0 * lseg_da
157
+
158
+ if cfg.TRAIN.DRIVABLE_ONLY:
159
+ lcls = 0 * lcls
160
+ lobj = 0 * lobj
161
+ lbox = 0 * lbox
162
+ lseg_ll = 0 * lseg_ll
163
+ liou_ll = 0 * liou_ll
164
+
165
+ loss = lbox + lobj + lcls + lseg_da + lseg_ll + liou_ll
166
+ # loss = lseg
167
+ # return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
168
+
169
+ # 返回详细的损失字典,方便tensorboard记录
170
+ loss_dict = {
171
+ 'box_loss': lbox.item(),
172
+ 'obj_loss': lobj.item(),
173
+ 'cls_loss': lcls.item(),
174
+ 'da_seg_loss': lseg_da.item(),
175
+ 'll_seg_loss': lseg_ll.item(),
176
+ 'll_iou_loss': liou_ll.item(),
177
+ 'total_loss': loss.item()
178
+ }
179
+
180
+ return loss, loss_dict
181
+
182
+
183
+ def get_loss(cfg, device):
184
+ """
185
+ get MultiHeadLoss
186
+
187
+ Inputs:
188
+ -cfg: configuration use the loss_name part or
189
+ function part(like regression classification)
190
+ -device: cpu or gpu device
191
+
192
+ Returns:
193
+ -loss: (MultiHeadLoss)
194
+
195
+ """
196
+ # class loss criteria
197
+ BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([cfg.LOSS.CLS_POS_WEIGHT])).to(device)
198
+ # object loss criteria
199
+ BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([cfg.LOSS.OBJ_POS_WEIGHT])).to(device)
200
+ # segmentation loss criteria
201
+ BCEseg = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([cfg.LOSS.SEG_POS_WEIGHT])).to(device)
202
+ # Focal loss
203
+ gamma = cfg.LOSS.FL_GAMMA # focal loss gamma
204
+ if gamma > 0:
205
+ BCEcls, BCEobj = FocalLoss(BCEcls, gamma), FocalLoss(BCEobj, gamma)
206
+
207
+ loss_list = [BCEcls, BCEobj, BCEseg]
208
+ loss = MultiHeadLoss(loss_list, cfg=cfg, lambdas=cfg.LOSS.MULTI_HEAD_LAMBDA)
209
+ return loss
210
+
211
+ # example
212
+ # class L1_Loss(nn.Module)
213
+
214
+
215
+ def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
216
+ # return positive, negative label smoothing BCE targets
217
+ return 1.0 - 0.5 * eps, 0.5 * eps
218
+
219
+
220
+ class FocalLoss(nn.Module):
221
+ # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
222
+ def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
223
+ # alpha balance positive & negative samples
224
+ # gamma focus on difficult samples
225
+ super(FocalLoss, self).__init__()
226
+ self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
227
+ self.gamma = gamma
228
+ self.alpha = alpha
229
+ self.reduction = loss_fcn.reduction
230
+ self.loss_fcn.reduction = 'none' # required to apply FL to each element
231
+
232
+ def forward(self, pred, true):
233
+ loss = self.loss_fcn(pred, true)
234
+ # p_t = torch.exp(-loss)
235
+ # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
236
+
237
+ # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
238
+ pred_prob = torch.sigmoid(pred) # prob from logits
239
+ p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
240
+ alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
241
+ modulating_factor = (1.0 - p_t) ** self.gamma
242
+ loss *= alpha_factor * modulating_factor
243
+
244
+ if self.reduction == 'mean':
245
+ return loss.mean()
246
+ elif self.reduction == 'sum':
247
+ return loss.sum()
248
+ else: # 'none'
249
+ return loss
lib/core/postprocess.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from lib.utils import is_parallel
3
+ import numpy as np
4
+ np.set_printoptions(threshold=np.inf)
5
+ import cv2
6
+ from sklearn.cluster import DBSCAN
7
+
8
+
9
+ def build_targets(cfg, predictions, targets, model):
10
+ '''
11
+ predictions
12
+ [16, 3, 32, 32, 85]
13
+ [16, 3, 16, 16, 85]
14
+ [16, 3, 8, 8, 85]
15
+ torch.tensor(predictions[i].shape)[[3, 2, 3, 2]]
16
+ [32,32,32,32]
17
+ [16,16,16,16]
18
+ [8,8,8,8]
19
+ targets[3,x,7]
20
+ t [index, class, x, y, w, h, head_index]
21
+ '''
22
+ # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
23
+ det = model.module.model[model.module.detector_index] if is_parallel(model) \
24
+ else model.model[model.detector_index] # Detect() module
25
+ # print(type(model))
26
+ # det = model.model[model.detector_index]
27
+ # print(type(det))
28
+ na, nt = det.na, targets.shape[0] # number of anchors, targets
29
+ tcls, tbox, indices, anch = [], [], [], []
30
+ gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
31
+ ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
32
+ targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
33
+
34
+ g = 0.5 # bias
35
+ off = torch.tensor([[0, 0],
36
+ [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
37
+ # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
38
+ ], device=targets.device).float() * g # offsets
39
+
40
+ for i in range(det.nl):
41
+ anchors = det.anchors[i] #[3,2]
42
+ gain[2:6] = torch.tensor(predictions[i].shape)[[3, 2, 3, 2]] # xyxy gain
43
+ # Match targets to anchors
44
+ t = targets * gain
45
+
46
+ if nt:
47
+ # Matches
48
+ r = t[:, :, 4:6] / anchors[:, None] # wh ratio
49
+ j = torch.max(r, 1. / r).max(2)[0] < cfg.TRAIN.ANCHOR_THRESHOLD # compare
50
+ # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
51
+ t = t[j] # filter
52
+
53
+ # Offsets
54
+ gxy = t[:, 2:4] # grid xy
55
+ gxi = gain[[2, 3]] - gxy # inverse
56
+ j, k = ((gxy % 1. < g) & (gxy > 1.)).T
57
+ l, m = ((gxi % 1. < g) & (gxi > 1.)).T
58
+ j = torch.stack((torch.ones_like(j), j, k, l, m))
59
+ t = t.repeat((5, 1, 1))[j]
60
+ offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
61
+ else:
62
+ t = targets[0]
63
+ offsets = 0
64
+
65
+ # Define
66
+ b, c = t[:, :2].long().T # image, class
67
+ gxy = t[:, 2:4] # grid xy
68
+ gwh = t[:, 4:6] # grid wh
69
+ gij = (gxy - offsets).long()
70
+ gi, gj = gij.T # grid xy indices
71
+
72
+ # Append
73
+ a = t[:, 6].long() # anchor indices
74
+ indices.append((b, a, gj.clamp_(0, int(gain[3]) - 1), gi.clamp_(0, int(gain[2]) - 1))) # image, anchor, grid indices
75
+ tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
76
+ anch.append(anchors[a]) # anchors
77
+ tcls.append(c) # class
78
+
79
+ return tcls, tbox, indices, anch
80
+
81
+ def morphological_process(image, kernel_size=5, func_type=cv2.MORPH_CLOSE):
82
+ """
83
+ morphological process to fill the hole in the binary segmentation result
84
+ :param image:
85
+ :param kernel_size:
86
+ :return:
87
+ """
88
+ if len(image.shape) == 3:
89
+ raise ValueError('Binary segmentation result image should be a single channel image')
90
+
91
+ if image.dtype is not np.uint8:
92
+ image = np.array(image, np.uint8)
93
+
94
+ kernel = cv2.getStructuringElement(shape=cv2.MORPH_ELLIPSE, ksize=(kernel_size, kernel_size))
95
+
96
+ # close operation fille hole
97
+ closing = cv2.morphologyEx(image, func_type, kernel, iterations=1)
98
+
99
+ return closing
100
+
101
+ def connect_components_analysis(image):
102
+ """
103
+ connect components analysis to remove the small components
104
+ :param image:
105
+ :return:
106
+ """
107
+ if len(image.shape) == 3:
108
+ gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
109
+ else:
110
+ gray_image = image
111
+ # print(gray_image.dtype)
112
+ return cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S)
113
+
114
+ def if_y(samples_x):
115
+ for sample_x in samples_x:
116
+ if len(sample_x):
117
+ # if len(sample_x) != (sample_x[-1] - sample_x[0] + 1) or sample_x[-1] == sample_x[0]:
118
+ if sample_x[-1] == sample_x[0]:
119
+ return False
120
+ return True
121
+
122
+ def fitlane(mask, sel_labels, labels, stats):
123
+ H, W = mask.shape
124
+ for label_group in sel_labels:
125
+ states = [stats[k] for k in label_group]
126
+ x, y, w, h, _ = states[0]
127
+ # if len(label_group) > 1:
128
+ # print('in')
129
+ # for m in range(len(label_group)-1):
130
+ # labels[labels == label_group[m+1]] = label_group[0]
131
+ t = label_group[0]
132
+ # samples_y = np.linspace(y, H-1, 30)
133
+ # else:
134
+ samples_y = np.linspace(y, y+h-1, 30)
135
+
136
+ samples_x = [np.where(labels[int(sample_y)]==t)[0] for sample_y in samples_y]
137
+
138
+ if if_y(samples_x):
139
+ samples_x = [int(np.mean(sample_x)) if len(sample_x) else -1 for sample_x in samples_x]
140
+ samples_x = np.array(samples_x)
141
+ samples_y = np.array(samples_y)
142
+ samples_y = samples_y[samples_x != -1]
143
+ samples_x = samples_x[samples_x != -1]
144
+ func = np.polyfit(samples_y, samples_x, 2)
145
+ x_limits = np.polyval(func, H-1)
146
+ # if (y_max + h - 1) >= 720:
147
+ if x_limits < 0 or x_limits > W:
148
+ # if (y_max + h - 1) > 720:
149
+ # draw_y = np.linspace(y, 720-1, 720-y)
150
+ draw_y = np.linspace(y, y+h-1, h)
151
+ else:
152
+ # draw_y = np.linspace(y, y+h-1, y+h-y)
153
+ draw_y = np.linspace(y, H-1, H-y)
154
+ draw_x = np.polyval(func, draw_y)
155
+ # draw_y = draw_y[draw_x < W]
156
+ # draw_x = draw_x[draw_x < W]
157
+ draw_points = (np.asarray([draw_x, draw_y]).T).astype(np.int32)
158
+ cv2.polylines(mask, [draw_points], False, 1, thickness=15)
159
+ else:
160
+ # if ( + w - 1) >= 1280:
161
+ samples_x = np.linspace(x, W-1, 30)
162
+ # else:
163
+ # samples_x = np.linspace(x, x_max+w-1, 30)
164
+ samples_y = [np.where(labels[:, int(sample_x)]==t)[0] for sample_x in samples_x]
165
+ samples_y = [int(np.mean(sample_y)) if len(sample_y) else -1 for sample_y in samples_y]
166
+ samples_x = np.array(samples_x)
167
+ samples_y = np.array(samples_y)
168
+ samples_x = samples_x[samples_y != -1]
169
+ samples_y = samples_y[samples_y != -1]
170
+ try:
171
+ func = np.polyfit(samples_x, samples_y, 2)
172
+ except:
173
+ pass
174
+ # y_limits = np.polyval(func, 0)
175
+ # if y_limits > 720 or y_limits < 0:
176
+ # if (x + w - 1) >= 1280:
177
+ # draw_x = np.linspace(x, 1280-1, 1280-x)
178
+ # else:
179
+ y_limits = np.polyval(func, 0)
180
+ if y_limits >= H or y_limits < 0:
181
+ draw_x = np.linspace(x, x+w-1, w+x-x)
182
+ else:
183
+ y_limits = np.polyval(func, W-1)
184
+ if y_limits >= H or y_limits < 0:
185
+ draw_x = np.linspace(x, x+w-1, w+x-x)
186
+ # if x+w-1 < 640:
187
+ # draw_x = np.linspace(0, x+w-1, w+x-x)
188
+ else:
189
+ draw_x = np.linspace(x, W-1, W-x)
190
+ draw_y = np.polyval(func, draw_x)
191
+ draw_points = (np.asarray([draw_x, draw_y]).T).astype(np.int32)
192
+ cv2.polylines(mask, [draw_points], False, 1, thickness=15)
193
+ return mask
194
+
195
+ def connect_lane(image, shadow_height=0):
196
+ if len(image.shape) == 3:
197
+ gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
198
+ else:
199
+ gray_image = image
200
+ if shadow_height:
201
+ image[:shadow_height] = 0
202
+ mask = np.zeros((image.shape[0], image.shape[1]), np.uint8)
203
+
204
+ num_labels, labels, stats, centers = cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S)
205
+ # ratios = []
206
+ selected_label = []
207
+
208
+ for t in range(1, num_labels, 1):
209
+ _, _, _, _, area = stats[t]
210
+ if area > 400:
211
+ selected_label.append(t)
212
+ if len(selected_label) == 0:
213
+ return mask
214
+ else:
215
+ split_labels = [[label,] for label in selected_label]
216
+ mask_post = fitlane(mask, split_labels, labels, stats)
217
+ return mask_post
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
lib/dataset/AutoDriveDataset.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ # np.set_printoptions(threshold=np.inf)
4
+ import random
5
+ import torch
6
+ import torchvision.transforms as transforms
7
+ # from visualization import plot_img_and_mask,plot_one_box,show_seg_result
8
+ from pathlib import Path
9
+ from PIL import Image
10
+ from torch.utils.data import Dataset
11
+ from ..utils import letterbox, augment_hsv, random_perspective, xyxy2xywh, cutout
12
+
13
+
14
+ class AutoDriveDataset(Dataset):
15
+ """
16
+ A general Dataset for some common function
17
+ """
18
+ def __init__(self, cfg, is_train, inputsize=640, transform=None):
19
+ """
20
+ initial all the characteristic
21
+
22
+ Inputs:
23
+ -cfg: configurations
24
+ -is_train(bool): whether train set or not
25
+ -transform: ToTensor and Normalize
26
+
27
+ Returns:
28
+ None
29
+ """
30
+ self.is_train = is_train
31
+ self.cfg = cfg
32
+ self.transform = transform
33
+ self.inputsize = inputsize
34
+ self.Tensor = transforms.ToTensor()
35
+ img_root = Path(cfg.DATASET.DATAROOT)
36
+ label_root = Path(cfg.DATASET.LABELROOT)
37
+ mask_root = Path(cfg.DATASET.MASKROOT)
38
+ lane_root = Path(cfg.DATASET.LANEROOT)
39
+ if is_train:
40
+ indicator = cfg.DATASET.TRAIN_SET
41
+ else:
42
+ indicator = cfg.DATASET.TEST_SET
43
+ self.img_root = img_root / indicator
44
+ self.label_root = label_root / indicator
45
+ self.mask_root = mask_root / indicator
46
+ self.lane_root = lane_root / indicator
47
+ # self.label_list = self.label_root.iterdir()
48
+ self.mask_list = self.mask_root.iterdir()
49
+
50
+ self.db = []
51
+
52
+ self.data_format = cfg.DATASET.DATA_FORMAT
53
+
54
+ self.scale_factor = cfg.DATASET.SCALE_FACTOR
55
+ self.rotation_factor = cfg.DATASET.ROT_FACTOR
56
+ self.flip = cfg.DATASET.FLIP
57
+ self.color_rgb = cfg.DATASET.COLOR_RGB
58
+
59
+ # self.target_type = cfg.MODEL.TARGET_TYPE
60
+ self.shapes = np.array(cfg.DATASET.ORG_IMG_SIZE)
61
+
62
+ def _get_db(self):
63
+ """
64
+ finished on children Dataset(for dataset which is not in Bdd100k format, rewrite children Dataset)
65
+ """
66
+ raise NotImplementedError
67
+
68
+ def evaluate(self, cfg, preds, output_dir):
69
+ """
70
+ finished on children dataset
71
+ """
72
+ raise NotImplementedError
73
+
74
+ def __len__(self,):
75
+ """
76
+ number of objects in the dataset
77
+ """
78
+ return len(self.db)
79
+
80
+ def __getitem__(self, idx):
81
+ """
82
+ Get input and groud-truth from database & add data augmentation on input
83
+
84
+ Inputs:
85
+ -idx: the index of image in self.db(database)(list)
86
+ self.db(list) [a,b,c,...]
87
+ a: (dictionary){'image':, 'information':}
88
+
89
+ Returns:
90
+ -image: transformed image, first passed the data augmentation in __getitem__ function(type:numpy), then apply self.transform
91
+ -target: ground truth(det_gt,seg_gt)
92
+
93
+ function maybe useful
94
+ cv2.imread
95
+ cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
96
+ cv2.warpAffine
97
+ """
98
+ data = self.db[idx]
99
+ img = cv2.imread(data["image"], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
100
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
101
+ # seg_label = cv2.imread(data["mask"], 0)
102
+ if self.cfg.num_seg_class == 3:
103
+ seg_label = cv2.imread(data["mask"])
104
+ else:
105
+ seg_label = cv2.imread(data["mask"], 0)
106
+ lane_label = cv2.imread(data["lane"], 0)
107
+ #print(lane_label.shape)
108
+ # print(seg_label.shape)
109
+ # print(lane_label.shape)
110
+ # print(seg_label.shape)
111
+ resized_shape = self.inputsize
112
+ if isinstance(resized_shape, list):
113
+ resized_shape = max(resized_shape)
114
+ h0, w0 = img.shape[:2] # orig hw
115
+ r = resized_shape / max(h0, w0) # resize image to img_size
116
+ if r != 1: # always resize down, only resize up if training with augmentation
117
+ interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
118
+ img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
119
+ seg_label = cv2.resize(seg_label, (int(w0 * r), int(h0 * r)), interpolation=interp)
120
+ lane_label = cv2.resize(lane_label, (int(w0 * r), int(h0 * r)), interpolation=interp)
121
+ h, w = img.shape[:2]
122
+
123
+ (img, seg_label, lane_label), ratio, pad = letterbox((img, seg_label, lane_label), resized_shape, auto=True, scaleup=self.is_train)
124
+ shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
125
+ # ratio = (w / w0, h / h0)
126
+ # print(resized_shape)
127
+
128
+ det_label = data["label"]
129
+ labels=[]
130
+
131
+ if det_label.size > 0:
132
+ # Normalized xywh to pixel xyxy format
133
+ labels = det_label.copy()
134
+ labels[:, 1] = ratio[0] * w * (det_label[:, 1] - det_label[:, 3] / 2) + pad[0] # pad width
135
+ labels[:, 2] = ratio[1] * h * (det_label[:, 2] - det_label[:, 4] / 2) + pad[1] # pad height
136
+ labels[:, 3] = ratio[0] * w * (det_label[:, 1] + det_label[:, 3] / 2) + pad[0]
137
+ labels[:, 4] = ratio[1] * h * (det_label[:, 2] + det_label[:, 4] / 2) + pad[1]
138
+
139
+ if self.is_train:
140
+ combination = (img, seg_label, lane_label)
141
+ (img, seg_label, lane_label), labels = random_perspective(
142
+ combination=combination,
143
+ targets=labels,
144
+ degrees=self.cfg.DATASET.ROT_FACTOR,
145
+ translate=self.cfg.DATASET.TRANSLATE,
146
+ scale=self.cfg.DATASET.SCALE_FACTOR,
147
+ shear=self.cfg.DATASET.SHEAR
148
+ )
149
+ #print(labels.shape)
150
+ augment_hsv(img, hgain=self.cfg.DATASET.HSV_H, sgain=self.cfg.DATASET.HSV_S, vgain=self.cfg.DATASET.HSV_V)
151
+ # img, seg_label, labels = cutout(combination=combination, labels=labels)
152
+
153
+ if len(labels):
154
+ # convert xyxy to xywh
155
+ labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
156
+
157
+ # Normalize coordinates 0 - 1
158
+ labels[:, [2, 4]] /= img.shape[0] # height
159
+ labels[:, [1, 3]] /= img.shape[1] # width
160
+
161
+ # if self.is_train:
162
+ # random left-right flip
163
+ lr_flip = True
164
+ if lr_flip and random.random() < 0.5:
165
+ img = np.fliplr(img)
166
+ seg_label = np.fliplr(seg_label)
167
+ lane_label = np.fliplr(lane_label)
168
+ if len(labels):
169
+ labels[:, 1] = 1 - labels[:, 1]
170
+
171
+ # random up-down flip
172
+ ud_flip = False
173
+ if ud_flip and random.random() < 0.5:
174
+ img = np.flipud(img)
175
+ seg_label = np.filpud(seg_label)
176
+ lane_label = np.filpud(lane_label)
177
+ if len(labels):
178
+ labels[:, 2] = 1 - labels[:, 2]
179
+
180
+ else:
181
+ if len(labels):
182
+ # convert xyxy to xywh
183
+ labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
184
+
185
+ # Normalize coordinates 0 - 1
186
+ labels[:, [2, 4]] /= img.shape[0] # height
187
+ labels[:, [1, 3]] /= img.shape[1] # width
188
+
189
+ labels_out = torch.zeros((len(labels), 6))
190
+ if len(labels):
191
+ labels_out[:, 1:] = torch.from_numpy(labels)
192
+ # Convert
193
+ # img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
194
+ # img = img.transpose(2, 0, 1)
195
+ img = np.ascontiguousarray(img)
196
+ # seg_label = np.ascontiguousarray(seg_label)
197
+ # if idx == 0:
198
+ # print(seg_label[:,:,0])
199
+
200
+ if self.cfg.num_seg_class == 3:
201
+ _,seg0 = cv2.threshold(seg_label[:,:,0],128,255,cv2.THRESH_BINARY)
202
+ _,seg1 = cv2.threshold(seg_label[:,:,1],1,255,cv2.THRESH_BINARY)
203
+ _,seg2 = cv2.threshold(seg_label[:,:,2],1,255,cv2.THRESH_BINARY)
204
+ else:
205
+ _,seg1 = cv2.threshold(seg_label,1,255,cv2.THRESH_BINARY)
206
+ _,seg2 = cv2.threshold(seg_label,1,255,cv2.THRESH_BINARY_INV)
207
+ _,lane1 = cv2.threshold(lane_label,1,255,cv2.THRESH_BINARY)
208
+ _,lane2 = cv2.threshold(lane_label,1,255,cv2.THRESH_BINARY_INV)
209
+ # _,seg2 = cv2.threshold(seg_label[:,:,2],1,255,cv2.THRESH_BINARY)
210
+ # # seg1[cutout_mask] = 0
211
+ # # seg2[cutout_mask] = 0
212
+
213
+ # seg_label /= 255
214
+ # seg0 = self.Tensor(seg0)
215
+ if self.cfg.num_seg_class == 3:
216
+ seg0 = self.Tensor(seg0)
217
+ seg1 = self.Tensor(seg1)
218
+ seg2 = self.Tensor(seg2)
219
+ # seg1 = self.Tensor(seg1)
220
+ # seg2 = self.Tensor(seg2)
221
+ lane1 = self.Tensor(lane1)
222
+ lane2 = self.Tensor(lane2)
223
+
224
+ # seg_label = torch.stack((seg2[0], seg1[0]),0)
225
+ if self.cfg.num_seg_class == 3:
226
+ seg_label = torch.stack((seg0[0],seg1[0],seg2[0]),0)
227
+ else:
228
+ seg_label = torch.stack((seg2[0], seg1[0]),0)
229
+
230
+ lane_label = torch.stack((lane2[0], lane1[0]),0)
231
+ # _, gt_mask = torch.max(seg_label, 0)
232
+ # _ = show_seg_result(img, gt_mask, idx, 0, save_dir='debug', is_gt=True)
233
+
234
+
235
+ target = [labels_out, seg_label, lane_label]
236
+ img = self.transform(img)
237
+
238
+ return img, target, data["image"], shapes
239
+
240
+ def select_data(self, db):
241
+ """
242
+ You can use this function to filter useless images in the dataset
243
+
244
+ Inputs:
245
+ -db: (list)database
246
+
247
+ Returns:
248
+ -db_selected: (list)filtered dataset
249
+ """
250
+ db_selected = ...
251
+ return db_selected
252
+
253
+ @staticmethod
254
+ def collate_fn(batch):
255
+ img, label, paths, shapes= zip(*batch)
256
+ label_det, label_seg, label_lane = [], [], []
257
+ for i, l in enumerate(label):
258
+ l_det, l_seg, l_lane = l
259
+ l_det[:, 0] = i # add target image index for build_targets()
260
+ label_det.append(l_det)
261
+ label_seg.append(l_seg)
262
+ label_lane.append(l_lane)
263
+ return torch.stack(img, 0), [torch.cat(label_det, 0), torch.stack(label_seg, 0), torch.stack(label_lane, 0)], paths, shapes
264
+
lib/dataset/DemoDataset.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import glob
2
+ import os
3
+ import random
4
+ import shutil
5
+ import time
6
+ from pathlib import Path
7
+ from threading import Thread
8
+
9
+ import cv2
10
+ import math
11
+ import numpy as np
12
+ import torch
13
+ from PIL import Image, ExifTags
14
+ from torch.utils.data import Dataset
15
+ from tqdm import tqdm
16
+
17
+ from ..utils import letterbox_for_img, clean_str
18
+
19
+ img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
20
+ vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
21
+
22
+ class LoadImages: # for inference
23
+ def __init__(self, path, img_size=640):
24
+ p = str(Path(path)) # os-agnostic
25
+ p = os.path.abspath(p) # absolute path
26
+ if '*' in p:
27
+ files = sorted(glob.glob(p, recursive=True)) # glob
28
+ elif os.path.isdir(p):
29
+ files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
30
+ elif os.path.isfile(p):
31
+ files = [p] # files
32
+ else:
33
+ raise Exception('ERROR: %s does not exist' % p)
34
+
35
+ images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
36
+ videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
37
+ ni, nv = len(images), len(videos)
38
+
39
+ self.img_size = img_size
40
+ self.files = images + videos
41
+ self.nf = ni + nv # number of files
42
+ self.video_flag = [False] * ni + [True] * nv
43
+ self.mode = 'images'
44
+ if any(videos):
45
+ self.new_video(videos[0]) # new video
46
+ else:
47
+ self.cap = None
48
+ assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
49
+ (p, img_formats, vid_formats)
50
+
51
+ def __iter__(self):
52
+ self.count = 0
53
+ return self
54
+
55
+ def __next__(self):
56
+ if self.count == self.nf:
57
+ raise StopIteration
58
+ path = self.files[self.count]
59
+
60
+ if self.video_flag[self.count]:
61
+ # Read video
62
+ self.mode = 'video'
63
+ ret_val, img0 = self.cap.read()
64
+ if not ret_val:
65
+ self.count += 1
66
+ self.cap.release()
67
+ if self.count == self.nf: # last video
68
+ raise StopIteration
69
+ else:
70
+ path = self.files[self.count]
71
+ self.new_video(path)
72
+ ret_val, img0 = self.cap.read()
73
+ h0, w0 = img0.shape[:2]
74
+
75
+ self.frame += 1
76
+ print('\n video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
77
+
78
+ else:
79
+ # Read image
80
+ self.count += 1
81
+ img0 = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # BGR
82
+ #img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2RGB)
83
+ assert img0 is not None, 'Image Not Found ' + path
84
+ print('image %g/%g %s: \n' % (self.count, self.nf, path), end='')
85
+ h0, w0 = img0.shape[:2]
86
+
87
+ # Padded resize
88
+ img, ratio, pad = letterbox_for_img(img0, new_shape=self.img_size, auto=True)
89
+ h, w = img.shape[:2]
90
+ shapes = (h0, w0), ((h / h0, w / w0), pad)
91
+
92
+ # Convert
93
+ #img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
94
+ img = np.ascontiguousarray(img)
95
+
96
+
97
+ # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image
98
+ return path, img, img0, self.cap, shapes
99
+
100
+ def new_video(self, path):
101
+ self.frame = 0
102
+ self.cap = cv2.VideoCapture(path)
103
+ self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
104
+
105
+ def __len__(self):
106
+ return self.nf # number of files
107
+
108
+
109
+
110
+ class LoadStreams: # multiple IP or RTSP cameras
111
+ def __init__(self, sources='streams.txt', img_size=640, auto=True):
112
+ self.mode = 'stream'
113
+ self.img_size = img_size
114
+
115
+ if os.path.isfile(sources):
116
+ with open(sources, 'r') as f:
117
+ sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
118
+ else:
119
+ sources = [sources]
120
+
121
+ n = len(sources)
122
+ self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
123
+ self.sources = [clean_str(x) for x in sources] # clean source names for later
124
+ self.auto = auto
125
+ for i, s in enumerate(sources): # index, source
126
+ # Start thread to read frames from video stream
127
+ print(f'{i + 1}/{n}: {s}... ', end='')
128
+ s = eval(s) if s.isnumeric() else s # i.e. s = '0' local webcam
129
+ cap = cv2.VideoCapture(s)
130
+ assert cap.isOpened(), f'Failed to open {s}'
131
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
132
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
133
+ self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0 # 30 FPS fallback
134
+ self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf') # infinite stream fallback
135
+
136
+ _, self.imgs[i] = cap.read() # guarantee first frame
137
+ self.threads[i] = Thread(target=self.update, args=([i, cap]), daemon=True)
138
+ print(f" success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
139
+ self.threads[i].start()
140
+ print('') # newline
141
+
142
+ # check for common shapes
143
+
144
+ s = np.stack([letterbox_for_img(x, self.img_size, auto=self.auto)[0].shape for x in self.imgs], 0) # shapes
145
+ self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
146
+ if not self.rect:
147
+ print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
148
+
149
+ def update(self, i, cap):
150
+ # Read stream `i` frames in daemon thread
151
+ n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame
152
+ while cap.isOpened() and n < f:
153
+ n += 1
154
+ # _, self.imgs[index] = cap.read()
155
+ cap.grab()
156
+ if n % read == 0:
157
+ success, im = cap.retrieve()
158
+ self.imgs[i] = im if success else self.imgs[i] * 0
159
+ time.sleep(1 / self.fps[i]) # wait time
160
+
161
+ def __iter__(self):
162
+ self.count = -1
163
+ return self
164
+
165
+ def __next__(self):
166
+ self.count += 1
167
+ if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'): # q to quit
168
+ cv2.destroyAllWindows()
169
+ raise StopIteration
170
+
171
+ # Letterbox
172
+ img0 = self.imgs.copy()
173
+
174
+ h0, w0 = img0[0].shape[:2]
175
+ img, _, pad = letterbox_for_img(img0[0], self.img_size, auto=self.rect and self.auto)
176
+
177
+ # Stack
178
+ h, w = img.shape[:2]
179
+ shapes = (h0, w0), ((h / h0, w / w0), pad)
180
+
181
+ # Convert
182
+ #img = img[..., ::-1].transpose((0, 3, 1, 2)) # BGR to RGB, BHWC to BCHW
183
+ img = np.ascontiguousarray(img)
184
+
185
+ return self.sources, img, img0[0], None, shapes
186
+
187
+ def __len__(self):
188
+ return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
lib/dataset/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .bdd import BddDataset
2
+ from .AutoDriveDataset import AutoDriveDataset
3
+ from .DemoDataset import LoadImages, LoadStreams
lib/dataset/__pycache__/AutoDriveDataset.cpython-310.pyc ADDED
Binary file (6.61 kB). View file
 
lib/dataset/__pycache__/AutoDriveDataset.cpython-37.pyc ADDED
Binary file (6.59 kB). View file
 
lib/dataset/__pycache__/DemoDataset.cpython-310.pyc ADDED
Binary file (6.55 kB). View file
 
lib/dataset/__pycache__/DemoDataset.cpython-37.pyc ADDED
Binary file (6.58 kB). View file
 
lib/dataset/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (279 Bytes). View file
 
lib/dataset/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (271 Bytes). View file
 
lib/dataset/__pycache__/bdd.cpython-310.pyc ADDED
Binary file (2.73 kB). View file
 
lib/dataset/__pycache__/bdd.cpython-37.pyc ADDED
Binary file (2.69 kB). View file
 
lib/dataset/__pycache__/convert.cpython-310.pyc ADDED
Binary file (726 Bytes). View file
 
lib/dataset/__pycache__/convert.cpython-37.pyc ADDED
Binary file (712 Bytes). View file
 
lib/dataset/bdd.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import json
3
+
4
+ from .AutoDriveDataset import AutoDriveDataset
5
+ from .convert import convert, id_dict, id_dict_single
6
+ from tqdm import tqdm
7
+
8
+ single_cls = True # just detect vehicle
9
+
10
+ class BddDataset(AutoDriveDataset):
11
+ def __init__(self, cfg, is_train, inputsize, transform=None):
12
+ super().__init__(cfg, is_train, inputsize, transform)
13
+ self.db = self._get_db()
14
+ self.cfg = cfg
15
+
16
+ def _get_db(self):
17
+ """
18
+ get database from the annotation file
19
+
20
+ Inputs:
21
+
22
+ Returns:
23
+ gt_db: (list)database [a,b,c,...]
24
+ a: (dictionary){'image':, 'information':, ......}
25
+ image: image path
26
+ mask: path of the segmetation label
27
+ label: [cls_id, center_x//256, center_y//256, w//256, h//256] 256=IMAGE_SIZE
28
+ """
29
+ print('building database...')
30
+ gt_db = []
31
+ height, width = self.shapes
32
+ for mask in tqdm(list(self.mask_list)):
33
+ mask_path = str(mask)
34
+ label_path = mask_path.replace(str(self.mask_root), str(self.label_root)).replace(".png", ".json")
35
+ image_path = mask_path.replace(str(self.mask_root), str(self.img_root)).replace(".png", ".jpg")
36
+ lane_path = mask_path.replace(str(self.mask_root), str(self.lane_root))
37
+ with open(label_path, 'r') as f:
38
+ label = json.load(f)
39
+ data = label['frames'][0]['objects']
40
+ data = self.filter_data(data)
41
+ gt = np.zeros((len(data), 5))
42
+ for idx, obj in enumerate(data):
43
+ category = obj['category']
44
+ if category == "traffic light":
45
+ color = obj['attributes']['trafficLightColor']
46
+ category = "tl_" + color
47
+ if category in id_dict.keys():
48
+ x1 = float(obj['box2d']['x1'])
49
+ y1 = float(obj['box2d']['y1'])
50
+ x2 = float(obj['box2d']['x2'])
51
+ y2 = float(obj['box2d']['y2'])
52
+ cls_id = id_dict[category]
53
+ if single_cls:
54
+ cls_id=0
55
+ gt[idx][0] = cls_id
56
+ box = convert((width, height), (x1, x2, y1, y2))
57
+ gt[idx][1:] = list(box)
58
+
59
+
60
+ rec = [{
61
+ 'image': image_path,
62
+ 'label': gt,
63
+ 'mask': mask_path,
64
+ 'lane': lane_path
65
+ }]
66
+
67
+ gt_db += rec
68
+ print('database build finish')
69
+ return gt_db
70
+
71
+ def filter_data(self, data):
72
+ remain = []
73
+ for obj in data:
74
+ if 'box2d' in obj.keys(): # obj.has_key('box2d'):
75
+ if single_cls:
76
+ if obj['category'] in id_dict_single.keys():
77
+ remain.append(obj)
78
+ else:
79
+ remain.append(obj)
80
+ return remain
81
+
82
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
83
+ """
84
+ """
85
+ pass
lib/dataset/convert.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # bdd_labels = {
2
+ # 'unlabeled':0, 'dynamic': 1, 'ego vehicle': 2, 'ground': 3,
3
+ # 'static': 4, 'parking': 5, 'rail track': 6, 'road': 7,
4
+ # 'sidewalk': 8, 'bridge': 9, 'building': 10, 'fence': 11,
5
+ # 'garage': 12, 'guard rail': 13, 'tunnel': 14, 'wall': 15,
6
+ # 'banner': 16, 'billboard': 17, 'lane divider': 18,'parking sign': 19,
7
+ # 'pole': 20, 'polegroup': 21, 'street light': 22, 'traffic cone': 23,
8
+ # 'traffic device': 24, 'traffic light': 25, 'traffic sign': 26, 'traffic sign frame': 27,
9
+ # 'terrain': 28, 'vegetation': 29, 'sky': 30, 'person': 31,
10
+ # 'rider': 32, 'bicycle': 33, 'bus': 34, 'car': 35,
11
+ # 'caravan': 36, 'motorcycle': 37, 'trailer': 38, 'train': 39,
12
+ # 'truck': 40
13
+ # }
14
+ id_dict = {'person': 0, 'rider': 1, 'car': 2, 'bus': 3, 'truck': 4,
15
+ 'bike': 5, 'motor': 6, 'tl_green': 7, 'tl_red': 8,
16
+ 'tl_yellow': 9, 'tl_none': 10, 'traffic sign': 11, 'train': 12}
17
+ id_dict_single = {'car': 0, 'bus': 1, 'truck': 2,'train': 3}
18
+ # id_dict = {'car': 0, 'bus': 1, 'truck': 2}
19
+
20
+ def convert(size, box):
21
+ dw = 1./(size[0])
22
+ dh = 1./(size[1])
23
+ x = (box[0] + box[1])/2.0
24
+ y = (box[2] + box[3])/2.0
25
+ w = box[1] - box[0]
26
+ h = box[3] - box[2]
27
+ x = x*dw
28
+ w = w*dw
29
+ y = y*dh
30
+ h = h*dh
31
+ return (x,y,w,h)
lib/dataset/hust.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import json
3
+
4
+ from .AutoDriveDataset import AutoDriveDataset
5
+ from .convert import convert, id_dict, id_dict_single
6
+ from tqdm import tqdm
7
+ import os
8
+
9
+ single_cls = False # just detect vehicle
10
+
11
+ class HustDataset(AutoDriveDataset):
12
+ def __init__(self, cfg, is_train, inputsize, transform=None):
13
+ super().__init__(cfg, is_train, inputsize, transform)
14
+ self.db = self._get_db()
15
+ self.cfg = cfg
16
+
17
+ def _get_db(self):
18
+ """
19
+ get database from the annotation file
20
+
21
+ Inputs:
22
+
23
+ Returns:
24
+ gt_db: (list)database [a,b,c,...]
25
+ a: (dictionary){'image':, 'information':, ......}
26
+ image: image path
27
+ mask: path of the segmetation label
28
+ label: [cls_id, center_x//256, center_y//256, w//256, h//256] 256=IMAGE_SIZE
29
+ """
30
+ print('building database...')
31
+ gt_db = []
32
+ height, width = self.shapes
33
+ for mask in tqdm(list(self.mask_list)):
34
+ mask_path = str(mask)
35
+ label_path = self.label_root
36
+ # label_path = mask_path.replace(str(self.mask_root), str(self.label_root)).replace(".png", ".json")
37
+ image_path = mask_path.replace(str(self.mask_root), str(self.img_root)).replace(".png", ".jpg")
38
+ lane_path = mask_path.replace(str(self.mask_root), str(self.lane_root))
39
+ with open(label_path, 'r') as f:
40
+ label = json.load(f)
41
+ data = label[int(os.path.basename(image_path)[:-4])]["labels"]
42
+ data = self.filter_data(data)
43
+ gt = np.zeros((len(data), 5))
44
+ for idx, obj in enumerate(data):
45
+ category = obj['category']
46
+ if category == "traffic light":
47
+ color = obj['attributes']['Traffic Light Color'][0]
48
+ category = "tl_" + color
49
+ if category in id_dict.keys():
50
+ x1 = float(obj['box2d']['x1'])
51
+ y1 = float(obj['box2d']['y1'])
52
+ x2 = float(obj['box2d']['x2'])
53
+ y2 = float(obj['box2d']['y2'])
54
+ cls_id = id_dict[category]
55
+ if single_cls:
56
+ cls_id=0
57
+ gt[idx][0] = cls_id
58
+ box = convert((width, height), (x1, x2, y1, y2))
59
+ gt[idx][1:] = list(box)
60
+
61
+
62
+ rec = [{
63
+ 'image': image_path,
64
+ 'label': gt,
65
+ 'mask': mask_path,
66
+ 'lane': lane_path
67
+ }]
68
+
69
+ gt_db += rec
70
+ print('database build finish')
71
+ return gt_db
72
+
73
+ def filter_data(self, data):
74
+ remain = []
75
+ for obj in data:
76
+ if 'box2d' in obj.keys(): # obj.has_key('box2d'):
77
+ if single_cls:
78
+ if obj['category'] in id_dict_single.keys():
79
+ remain.append(obj)
80
+ else:
81
+ remain.append(obj)
82
+ return remain
83
+
84
+ def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
85
+ """
86
+ """
87
+ pass