sft2005 commited on Oct 15, 2025

Commit

9545fea

verified ·

1 Parent(s): 07c776b

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +10 -0
.gitignore +12 -0
LICENSE +21 -0
README _CH.md +319 -0
README.md +347 -0
export_onnx.py +200 -0
hubconf.py +30 -0
lib/__init__.py +0 -0
lib/__pycache__/__init__.cpython-310.pyc +0 -0
lib/config/__init__.py +2 -0
lib/config/__pycache__/__init__.cpython-310.pyc +0 -0
lib/config/__pycache__/__init__.cpython-37.pyc +0 -0
lib/config/__pycache__/default.cpython-310.pyc +0 -0
lib/config/__pycache__/default.cpython-37.pyc +0 -0
lib/config/default.py +165 -0
lib/config/yolov11.py +30 -0
lib/core/__init__.py +1 -0
lib/core/__pycache__/__init__.cpython-310.pyc +0 -0
lib/core/__pycache__/__init__.cpython-37.pyc +0 -0
lib/core/__pycache__/evaluate.cpython-310.pyc +0 -0
lib/core/__pycache__/evaluate.cpython-37.pyc +0 -0
lib/core/__pycache__/function.cpython-310.pyc +0 -0
lib/core/__pycache__/function.cpython-37.pyc +0 -0
lib/core/__pycache__/general.cpython-310.pyc +0 -0
lib/core/__pycache__/general.cpython-37.pyc +0 -0
lib/core/__pycache__/loss.cpython-310.pyc +0 -0
lib/core/__pycache__/postprocess.cpython-310.pyc +0 -0
lib/core/__pycache__/postprocess.cpython-37.pyc +0 -0
lib/core/activations.py +72 -0
lib/core/evaluate.py +278 -0
lib/core/function.py +510 -0
lib/core/general.py +466 -0
lib/core/loss.py +249 -0
lib/core/postprocess.py +225 -0
lib/dataset/AutoDriveDataset.py +264 -0
lib/dataset/DemoDataset.py +188 -0
lib/dataset/__init__.py +3 -0
lib/dataset/__pycache__/AutoDriveDataset.cpython-310.pyc +0 -0
lib/dataset/__pycache__/AutoDriveDataset.cpython-37.pyc +0 -0
lib/dataset/__pycache__/DemoDataset.cpython-310.pyc +0 -0
lib/dataset/__pycache__/DemoDataset.cpython-37.pyc +0 -0
lib/dataset/__pycache__/__init__.cpython-310.pyc +0 -0
lib/dataset/__pycache__/__init__.cpython-37.pyc +0 -0
lib/dataset/__pycache__/bdd.cpython-310.pyc +0 -0
lib/dataset/__pycache__/bdd.cpython-37.pyc +0 -0
lib/dataset/__pycache__/convert.cpython-310.pyc +0 -0
lib/dataset/__pycache__/convert.cpython-37.pyc +0 -0
lib/dataset/bdd.py +85 -0
lib/dataset/convert.py +31 -0
lib/dataset/hust.py +87 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,13 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+pictures/da.png filter=lfs diff=lfs merge=lfs -text
+pictures/detect.png filter=lfs diff=lfs merge=lfs -text
+pictures/detect_onnx.jpg filter=lfs diff=lfs merge=lfs -text
+pictures/input1.gif filter=lfs diff=lfs merge=lfs -text
+pictures/input2.gif filter=lfs diff=lfs merge=lfs -text
+pictures/ll.png filter=lfs diff=lfs merge=lfs -text
+pictures/output1.gif filter=lfs diff=lfs merge=lfs -text
+pictures/output2.gif filter=lfs diff=lfs merge=lfs -text
+pictures/output_onnx.jpg filter=lfs diff=lfs merge=lfs -text
+pictures/yolop.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+.DS_Store
+__pycache__/
+.idea/
+.tmp/
+.vscode/
+bdd/
+runs/
+inference/
+*.pth
+*.pt
+*.tar
+*.tar.gz

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 Hust Visual Learning Team
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README _CH.md ADDED Viewed

	@@ -0,0 +1,319 @@

+<div align="left">
+## You Only :eyes: Once for Panoptic  :car: Perception
+> [**You Only Look at Once for Panoptic driving Perception**](https://arxiv.org/abs/2108.11250)
+>
+> by Dong Wu, Manwen Liao, Weitian Zhang, [Xinggang Wang](https://xinggangw.info/)<sup> :email:</sup>     [*School of EIC, HUST*](http://eic.hust.edu.cn/English/Home.htm)
+>
+>  (<sup>:email:</sup>) corresponding author.
+>
+> *arXiv technical report ([arXiv 2108.11250](https://arxiv.org/abs/2108.11250))*
+---
+[English Document](https://github.com/hustvl/YOLOP)
+### YOLOP框架
+![yolop](pictures/yolop.png)
+### 贡献
+* 我们提出了一种高效的多任务网络，该网络可以联合处理自动驾驶中的目标检测、可驾驶区域分割和车道检测三个关键任务。不但节省了计算成本，减少了推理时间，还提高了各个任务的性能。我们的工作是第一个在嵌入式设备上实现实时，同时在`BDD100K`数据集上保持`SOTA`（最先进的）性能水平。
+* 我们设计了消融实验来验证我们的多任务方案的有效性。证明了这三个任务不需要繁琐的交替优化就可以联合学习。
+* 我们设计了消融实验证明了基于网格的检测任务的预测机制与语义分割任务的预测机制更相关，相信这将为其他相关的多任务学习研究工作提供参考。
+### 实验结果
+#### 交通目标检测结果：
+| Model          | Recall(%) | mAP50(%) | Speed(fps) |
+| -------------- | --------- | -------- | ---------- |
+| `Multinet`     | 81.3      | 60.2     | 8.6        |
+| `DLT-Net`      | 89.4      | 68.4     | 9.3        |
+| `Faster R-CNN` | 77.2      | 55.6     | 5.3        |
+| `YOLOv5s`      | 86.8      | 77.2     | 82         |
+| `YOLOP(ours)`  | 89.2      | 76.5     | 41         |
+#### 可行驶区域分割结果：
+| Model         | mIOU(%) | Speed(fps) |
+| ------------- | ------- | ---------- |
+| `Multinet`    | 71.6    | 8.6        |
+| `DLT-Net`     | 71.3    | 9.3        |
+| `PSPNet`      | 89.6    | 11.1       |
+| `YOLOP(ours)` | 91.5    | 41         |
+#### 车道线检测结果:
+| Model         | mIOU(%) | IOU(%) |
+| ------------- | ------- | ------ |
+| `ENet`        | 34.12   | 14.64  |
+| `SCNN`        | 35.79   | 15.84  |
+| `ENet-SAD`    | 36.56   | 16.02  |
+| `YOLOP(ours)` | 70.50   | 26.20  |
+#### 消融实验 1: 端对端训练 v.s. 分步训练:
+| Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) |
+| --------------- | --------- | ----- | ------- | ----------- | ------ |
+| `ES-W`          | 87.0      | 75.3  | 90.4    | 66.8        | 26.2   |
+| `ED-W`          | 87.3      | 76.0  | 91.6    | 71.2        | 26.1   |
+| `ES-D-W`        | 87.0      | 75.1  | 91.7    | 68.6        | 27.0   |
+| `ED-S-W`        | 87.5      | 76.1  | 91.6    | 68.0        | 26.8   |
+| `End-to-end`    | 89.2      | 76.5  | 91.5    | 70.5        | 26.2   |
+#### 消融实验 2: 多任务学习 v.s. 单任务学习:
+| Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) | Speed(ms/frame) |
+| --------------- | --------- | ----- | ------- | ----------- | ------ | --------------- |
+| `Det(only)`     | 88.2      | 76.9  | -       | -           | -      | 15.7            |
+| `Da-Seg(only)`  | -         | -     | 92.0    | -           | -      | 14.8            |
+| `Ll-Seg(only)`  | -         | -     | -       | 79.6        | 27.9   | 14.8            |
+| `Multitask`     | 89.2      | 76.5  | 91.5    | 70.5        | 26.2   | 24.4            |
+#### 消融实验 3: 基于网格 v.s. 基于区域:
+| Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) | Speed(ms/frame) |
+| --------------- | --------- | ----- | ------- | ----------- | ------ | --------------- |
+| `R-CNNP Det(only)`     | 79.0      | 67.3  |  -      | -           | -      | -            |
+| `R-CNNP Seg(only)`     | -         | -     | 90.2    | 59.5        | 24.0   | -            |
+| `R-CNNP Multitask`     | 77.2(-1.8)| 62.6(-4.7)| 86.8(-3.4)| 49.8(-9.7)| 21.5(-2.5)| 103.3            |
+| `YOLOP  Det(only)`     | 88.2      | 76.9  | -       | -           | -      | -            |
+| `YOLOP  Seg(only)`     | -         | -     | 91.6    | 69.9        | 26.5   | -            |
+| `YOLOP  Multitask`     | 89.2(+1.0)| 76.5(-0.4)| 91.5(-0.1)| 70.5(+0.6)| 26.2(-0.3)| 24.4            |
+**Notes**:
+- 我们工作参考了以下工作： `Multinet`  ([论文](https://arxiv.org/pdf/1612.07695.pdf?utm_campaign=affiliate-ir-Optimise%20media%28%20South%20East%20Asia%29%20Pte.%20ltd._156_-99_national_R_all_ACQ_cpa_en&utm_content=&utm_source=%20388939),[代码](https://github.com/MarvinTeichmann/MultiNet)）,`DLT-Net`   ([论文](https://ieeexplore.ieee.org/abstract/document/8937825)）,`Faster R-CNN`  ([论文](https://proceedings.neurips.cc/paper/2015/file/14bfa6bb14875e45bba028a21ed38046-Paper.pdf),[代码](https://github.com/ShaoqingRen/faster_rcnn)）,`YOLOv5`（[代码](https://github.com/ultralytics/yolov5))  ,`PSPNet`([论文](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf),[代码](https://github.com/hszhao/PSPNet)) ,`ENet`([论文](https://arxiv.org/pdf/1606.02147.pdf),[代码](https://github.com/osmr/imgclsmob))    `SCNN`([论文](https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/16802/16322),[代码](https://github.com/XingangPan/SCNN))    `SAD-ENet`([论文](https://openaccess.thecvf.com/content_ICCV_2019/papers/Hou_Learning_Lightweight_Lane_Detection_CNNs_by_Self_Attention_Distillation_ICCV_2019_paper.pdf),[代码](https://github.com/cardwing/Codes-for-Lane-Detection)). 感谢他们精彩的工作
+- 在表 4中, E, D, S 和 W 分别代表 编码器（Encoder）, 检测头（Detect head）, 两个分割头（Segment heads）和整个网络（whole network）. 所以算法 (首先，我们只训练编码器和检测头。然后我们冻结编码器和检测头只训练两个分割头。最后，整个网络进行联合训练三个任务) 可以被记作 `ED-S-W`，以此类推。
+---
+### 可视化
+#### 交通目标检测结果
+![detect result](pictures/detect.png)
+#### 可行驶区域分割结果
+![](pictures/da.png)
+#### 车道线分割结果
+![](pictures/ll.png)
+**注意点**:
+- 车道线分割结果是经过曲线拟合的.
+---
+### Project Structure
+```python
+├─inference
+│ ├─images   # inference images
+│ ├─output   # inference result
+├─lib
+│ ├─config/default   # configuration of training and validation
+│ ├─core
+│ │ ├─activations.py   # activation function
+│ │ ├─evaluate.py   # calculation of metric
+│ │ ├─function.py   # training and validation of model
+│ │ ├─general.py   #calculation of metric、nms、conversion of data-format、visualization
+│ │ ├─loss.py   # loss function
+│ │ ├─postprocess.py   # postprocess(refine da-seg and ll-seg, unrelated to paper)
+│ ├─dataset
+│ │ ├─AutoDriveDataset.py   # Superclass dataset，general function
+│ │ ├─bdd.py   # Subclass dataset，specific function
+│ │ ├─hust.py   # Subclass dataset(Campus scene, unrelated to paper)
+│ │ ├─convect.py
+│ │ ├─DemoDataset.py   # demo dataset(image, video and stream)
+│ ├─models
+│ │ ├─YOLOP.py    # Setup and Configuration of model
+│ │ ├─light.py    # Model lightweight（unrelated to paper, zwt)
+│ │ ├─commom.py   # calculation module
+│ ├─utils
+│ │ ├─augmentations.py    # data augumentation
+│ │ ├─autoanchor.py   # auto anchor(k-means)
+│ │ ├─split_dataset.py  # (Campus scene, unrelated to paper)
+│ │ ├─utils.py  # logging、device_select、time_measure、optimizer_select、model_save&initialize 、Distributed training
+│ ├─run
+│ │ ├─dataset/training time  # Visualization, logging and model_save
+├─tools
+│ │ ├─demo.py    # demo(folder、camera)
+│ │ ├─test.py
+│ │ ├─train.py
+├─toolkits
+│ │ ├─deploy    # Deployment of model
+│ │ ├─datapre    # Generation of gt(mask) for drivable area segmentation task
+├─weights    # Pretraining model
+```
+---
+### Requirement
+整个代码库是在 python 3.版本, PyTorch 1.7+版本和 torchvision 0.8+版本上开发的:
+```
+conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch
+```
+其他依赖库的版本要求详见`requirements.txt`：
+```setup
+pip install -r requirements.txt
+```
+### Data preparation
+#### Download
+- 从 [images](https://bdd-data.berkeley.edu/)下载图片数据集
+- 从 [det_annotations](https://drive.google.com/file/d/1Ge-R8NTxG1eqd4zbryFo-1Uonuh0Nxyl/view?usp=sharing)下载检测任务的标签
+- 从 [da_seg_annotations](https://drive.google.com/file/d/1xy_DhUZRHR8yrZG3OwTQAHhYTnXn7URv/view?usp=sharing)下载可行驶区域分割任务的标签
+- 从 [ll_seg_annotations](https://drive.google.com/file/d/1lDNTPIQj_YLNZVkksKM25CvCHuquJ8AP/view?usp=sharing)下载车道线分割任务的标签
+我们推荐按照如下图片数据集文件结构:
+```
+├─dataset root
+│ ├─images
+│ │ ├─train
+│ │ ├─val
+│ ├─det_annotations
+│ │ ├─train
+│ │ ├─val
+│ ├─da_seg_annotations
+│ │ ├─train
+│ │ ├─val
+│ ├─ll_seg_annotations
+│ │ ├─train
+│ │ ├─val
+```
+在 `./lib/config/default.py`下更新数据集的路径配置。
+### 模型训练
+你可以在 `./lib/config/default.py`设定训练配置. (包括:  预训练模型的读取，损失函数， 数据增强，optimizer，训练预热和余弦退火，自动anchor，训练轮次epoch, batch_size)
+如果你想尝试交替优化或者单一任务学习，可以在`./lib/config/default.py` 中将对应的配置选项修改为 `True`。(如下，所有的配置都是 `False`, which means training multiple tasks end to end)。
+```python
+# Alternating optimization
+_C.TRAIN.SEG_ONLY = False           # Only train two segmentation branchs
+_C.TRAIN.DET_ONLY = False           # Only train detection branch
+_C.TRAIN.ENC_SEG_ONLY = False       # Only train encoder and two segmentation branchs
+_C.TRAIN.ENC_DET_ONLY = False       # Only train encoder and detection branch
+# Single task
+_C.TRAIN.DRIVABLE_ONLY = False      # Only train da_segmentation task
+_C.TRAIN.LANE_ONLY = False          # Only train ll_segmentation task
+_C.TRAIN.DET_ONLY = False          # Only train detection task
+```
+开始训练:
+```shell
+python tools/train.py
+```
+多GPU训练:
+```
+python -m torch.distributed.launch --nproc_per_node=N tools/train.py  # N: the number of GPUs
+```
+### 模型评测
+你可以在 `./lib/config/default.py`设定测试配置(包括： batch_size 以及  nms的阈值).
+开始评测:
+```shell
+python tools/test.py --weights weights/End-to-end.pth
+```
+### Demo测试
+我们提供两种测试方案
+####
+测试所使用的的图片存储在 `--source`下, 然后测试结果会保存在 `--save-dir`下：
+```shell
+python tools/demo.py --source inference/images
+```
+#### 相机实时
+如果你的计算机连接了摄像头, 你可以将 `source` 设为摄像头的序号(默认值为 0).
+```shell
+python tools/demo.py --source 0
+```
+#### 展示
+<table>
+    <tr>
+            <th>input</th>
+            <th>output</th>
+    </tr>
+    <tr>
+        <td><img src=pictures/input1.gif /></td>
+        <td><img src=pictures/output1.gif/></td>
+    </tr>
+    <tr>
+         <td><img src=pictures/input2.gif /></td>
+        <td><img src=pictures/output2.gif/></td>
+    </tr>
+</table>
+### 部署
+我们的模型可以在 `Jetson Tx2`上 连接`Zed Camera` 实时推理。我们使用 `TensorRT` 工具进行推理加速。我们在  `./toolkits/deploy`提供模型部署和推理的全部代码。
+### 分割标签生成
+你可以通过运行以下命令生成可行驶区域的Mask标签
+```shell
+python toolkits/datasetpre/gen_bdd_seglabel.py
+```
+## 引用
+如果你发现我们的代码和论文对你的研究有帮助， 可以考虑给我们 star :star:   和引用 :pencil: :
+```BibTeX
+@article{wu2022yolop,
+  title={Yolop: You only look once for panoptic driving perception},
+  author={Wu, Dong and Liao, Man-Wen and Zhang, Wei-Tian and Wang, Xing-Gang and Bai, Xiang and Cheng, Wen-Qing and Liu, Wen-Yu},
+  journal={Machine Intelligence Research},
+  pages={1--13},
+  year={2022},
+  publisher={Springer}
+}
+```

README.md ADDED Viewed

	@@ -0,0 +1,347 @@

+<div align="left">
+## You Only :eyes: Once for Panoptic  :car: Perception
+> [**You Only Look at Once for Panoptic driving Perception**](https://link.springer.com/article/10.1007/s11633-022-1339-y)
+>
+> by Dong Wu, Manwen Liao, Weitian Zhang, [Xinggang Wang](https://xwcv.github.io/)<sup> :email:</sup>, [Xiang Bai](https://scholar.google.com/citations?user=UeltiQ4AAAAJ&hl=zh-CN), [Wenqing Cheng](http://eic.hust.edu.cn/professor/chengwenqing/), [Wenyu Liu](http://eic.hust.edu.cn/professor/liuwenyu/)      [*School of EIC, HUST*](http://eic.hust.edu.cn/English/Home.htm)
+>
+>  (<sup>:email:</sup>) corresponding author.
+>
+> *arXiv technical report ([Machine Intelligence Research2022](https://link.springer.com/article/10.1007/s11633-022-1339-y))*
+---
+[中文文档](https://github.com/hustvl/YOLOP/blob/main/README%20_CH.md)
+### The Illustration of YOLOP
+![yolop](pictures/yolop.png)
+### Contributions
+* We put forward an efficient multi-task network that can jointly handle three crucial tasks in autonomous driving: object detection, drivable area segmentation and lane detection to save computational costs, reduce inference time as well as improve the performance of each task. Our work is the first to reach real-time on embedded devices while maintaining state-of-the-art level performance on the `BDD100K `dataset.
+* We design the ablative experiments to verify the effectiveness of our multi-tasking scheme. It is proved that the three tasks can be learned jointly without tedious alternating optimization.
+* We design the ablative experiments to prove that the grid-based prediction mechanism of detection task is more related to that of semantic segmentation task, which is believed to provide reference for other relevant multi-task learning research works.
+### Results
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/yolop-you-only-look-once-for-panoptic-driving/traffic-object-detection-on-bdd100k)](https://paperswithcode.com/sota/traffic-object-detection-on-bdd100k?p=yolop-you-only-look-once-for-panoptic-driving)
+#### Traffic Object Detection Result
+| Model          | Recall(%) | mAP50(%) | Speed(fps) |
+| -------------- | --------- | -------- | ---------- |
+| `Multinet`     | 81.3      | 60.2     | 8.6        |
+| `DLT-Net`      | 89.4      | 68.4     | 9.3        |
+| `Faster R-CNN` | 81.2      | 64.9     | 8.8        |
+| `YOLOv5s`      | 86.8      | 77.2     | 82         |
+| `YOLOP(ours)`  | 89.2      | 76.5     | 41         |
+#### Drivable Area Segmentation Result
+| Model         | mIOU(%) | Speed(fps) |
+| ------------- | ------- | ---------- |
+| `Multinet`    | 71.6    | 8.6        |
+| `DLT-Net`     | 71.3    | 9.3        |
+| `PSPNet`      | 89.6    | 11.1       |
+| `YOLOP(ours)` | 91.5    | 41         |
+#### Lane Detection Result:
+| Model         | mIOU(%) | IOU(%) |
+| ------------- | ------- | ------ |
+| `ENet`        | 34.12   | 14.64  |
+| `SCNN`        | 35.79   | 15.84  |
+| `ENet-SAD`    | 36.56   | 16.02  |
+| `YOLOP(ours)` | 70.50   | 26.20  |
+#### Ablation Studies 1: End-to-end v.s. Step-by-step:
+| Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) |
+| --------------- | --------- | ----- | ------- | ----------- | ------ |
+| `ES-W`          | 87.0      | 75.3  | 90.4    | 66.8        | 26.2   |
+| `ED-W`          | 87.3      | 76.0  | 91.6    | 71.2        | 26.1   |
+| `ES-D-W`        | 87.0      | 75.1  | 91.7    | 68.6        | 27.0   |
+| `ED-S-W`        | 87.5      | 76.1  | 91.6    | 68.0        | 26.8   |
+| `End-to-end`    | 89.2      | 76.5  | 91.5    | 70.5        | 26.2   |
+#### Ablation Studies 2: Multi-task v.s. Single task:
+| Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) | Speed(ms/frame) |
+| --------------- | --------- | ----- | ------- | ----------- | ------ | --------------- |
+| `Det(only)`     | 88.2      | 76.9  | -       | -           | -      | 15.7            |
+| `Da-Seg(only)`  | -         | -     | 92.0    | -           | -      | 14.8            |
+| `Ll-Seg(only)`  | -         | -     | -       | 79.6        | 27.9   | 14.8            |
+| `Multitask`     | 89.2      | 76.5  | 91.5    | 70.5        | 26.2   | 24.4            |
+#### Ablation Studies 3: Grid-based v.s. Region-based:
+| Training_method | Recall(%) | AP(%) | mIoU(%) | Accuracy(%) | IoU(%) | Speed(ms/frame) |
+| --------------- | --------- | ----- | ------- | ----------- | ------ | --------------- |
+| `R-CNNP Det(only)`     | 79.0      | 67.3  |  -      | -           | -      | -            |
+| `R-CNNP Seg(only)`     | -         | -     | 90.2    | 59.5        | 24.0   | -            |
+| `R-CNNP Multitask`     | 77.2(-1.8)| 62.6(-4.7)| 86.8(-3.4)| 49.8(-9.7)| 21.5(-2.5)| 103.3            |
+| `YOLOP  Det(only)`     | 88.2      | 76.9  | -       | -           | -      | -            |
+| `YOLOP  Seg(only)`     | -         | -     | 91.6    | 69.9        | 26.5   | -            |
+| `YOLOP  Multitask`     | 89.2(+1.0)| 76.5(-0.4)| 91.5(-0.1)| 70.5(+0.6)| 26.2(-0.3)| 24.4            |
+**Notes**:
+- The works we has use for reference including `Multinet`  ([paper](https://arxiv.org/pdf/1612.07695.pdf?utm_campaign=affiliate-ir-Optimise%20media%28%20South%20East%20Asia%29%20Pte.%20ltd._156_-99_national_R_all_ACQ_cpa_en&utm_content=&utm_source=%20388939),[code](https://github.com/MarvinTeichmann/MultiNet)）,`DLT-Net`   ([paper](https://ieeexplore.ieee.org/abstract/document/8937825)）,`Faster R-CNN`  ([paper](https://proceedings.neurips.cc/paper/2015/file/14bfa6bb14875e45bba028a21ed38046-Paper.pdf),[code](https://github.com/ShaoqingRen/faster_rcnn)）,`YOLOv5s`（[code](https://github.com/ultralytics/yolov5))  ,`PSPNet`([paper](https://openaccess.thecvf.com/content_cvpr_2017/papers/Zhao_Pyramid_Scene_Parsing_CVPR_2017_paper.pdf),[code](https://github.com/hszhao/PSPNet)) ,`ENet`([paper](https://arxiv.org/pdf/1606.02147.pdf),[code](https://github.com/osmr/imgclsmob))    `SCNN`([paper](https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/download/16802/16322),[code](https://github.com/XingangPan/SCNN))    `SAD-ENet`([paper](https://openaccess.thecvf.com/content_ICCV_2019/papers/Hou_Learning_Lightweight_Lane_Detection_CNNs_by_Self_Attention_Distillation_ICCV_2019_paper.pdf),[code](https://github.com/cardwing/Codes-for-Lane-Detection)). Thanks for their wonderful works.
+- In table 4, E, D, S and W refer to Encoder, Detect head, two Segment heads and whole network. So the Algorithm (First, we only train Encoder and Detect head. Then we freeze the Encoder and Detect head as well as train two Segmentation heads. Finally, the entire network is trained jointly for all three tasks.) can be marked as ED-S-W, and the same for others.
+---
+### Visualization
+#### Traffic Object Detection Result
+![detect result](pictures/detect.png)
+#### Drivable Area Segmentation Result
+![](pictures/da.png)
+#### Lane Detection Result
+![](pictures/ll.png)
+**Notes**:
+- The visualization of lane detection result has been post processed by quadratic fitting.
+---
+### Project Structure
+```python
+├─inference
+│ ├─images   # inference images
+│ ├─output   # inference result
+├─lib
+│ ├─config/default   # configuration of training and validation
+│ ├─core
+│ │ ├─activations.py   # activation function
+│ │ ├─evaluate.py   # calculation of metric
+│ │ ├─function.py   # training and validation of model
+│ │ ├─general.py   #calculation of metric、nms、conversion of data-format、visualization
+│ │ ├─loss.py   # loss function
+│ │ ├─postprocess.py   # postprocess(refine da-seg and ll-seg, unrelated to paper)
+│ ├─dataset
+│ │ ├─AutoDriveDataset.py   # Superclass dataset，general function
+│ │ ├─bdd.py   # Subclass dataset，specific function
+│ │ ├─hust.py   # Subclass dataset(Campus scene, unrelated to paper)
+│ │ ├─convect.py
+│ │ ├─DemoDataset.py   # demo dataset(image, video and stream)
+│ ├─models
+│ │ ├─YOLOP.py    # Setup and Configuration of model
+│ │ ├─light.py    # Model lightweight（unrelated to paper, zwt)
+│ │ ├─commom.py   # calculation module
+│ ├─utils
+│ │ ├─augmentations.py    # data augumentation
+│ │ ├─autoanchor.py   # auto anchor(k-means)
+│ │ ├─split_dataset.py  # (Campus scene, unrelated to paper)
+│ │ ├─utils.py  # logging、device_select、time_measure、optimizer_select、model_save&initialize 、Distributed training
+│ ├─run
+│ │ ├─dataset/training time  # Visualization, logging and model_save
+├─tools
+│ │ ├─demo.py    # demo(folder、camera)
+│ │ ├─test.py
+│ │ ├─train.py
+├─toolkits
+│ │ ├─deploy    # Deployment of model
+│ │ ├─datapre    # Generation of gt(mask) for drivable area segmentation task
+├─weights    # Pretraining model
+```
+---
+### Requirement
+This codebase has been developed with python version 3.7, PyTorch 1.7+ and torchvision 0.8+:
+```
+conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch
+```
+See `requirements.txt` for additional dependencies and version requirements.
+```setup
+pip install -r requirements.txt
+```
+### Data preparation
+#### Download
+- Download the images from [images](https://bdd-data.berkeley.edu/).
+- Download the annotations of detection from [det_annotations](https://drive.google.com/file/d/1Ge-R8NTxG1eqd4zbryFo-1Uonuh0Nxyl/view?usp=sharing).
+- Download the annotations of drivable area segmentation from [da_seg_annotations](https://drive.google.com/file/d/1xy_DhUZRHR8yrZG3OwTQAHhYTnXn7URv/view?usp=sharing).
+- Download the annotations of lane line segmentation from [ll_seg_annotations](https://drive.google.com/file/d/1lDNTPIQj_YLNZVkksKM25CvCHuquJ8AP/view?usp=sharing).
+We recommend the dataset directory structure to be the following:
+```
+# The id represent the correspondence relation
+├─dataset root
+│ ├─images
+│ │ ├─train
+│ │ ├─val
+│ ├─det_annotations
+│ │ ├─train
+│ │ ├─val
+│ ├─da_seg_annotations
+│ │ ├─train
+│ │ ├─val
+│ ├─ll_seg_annotations
+│ │ ├─train
+│ │ ├─val
+```
+Update the your dataset path in the `./lib/config/default.py`.
+### Training
+You can set the training configuration in the `./lib/config/default.py`. (Including:  the loading of preliminary model,  loss,  data augmentation, optimizer, warm-up and cosine annealing, auto-anchor, training epochs, batch_size).
+If you want try alternating optimization or train model for single task, please modify the corresponding configuration in `./lib/config/default.py` to `True`. (As following, all configurations is `False`, which means training multiple tasks end to end).
+```python
+# Alternating optimization
+_C.TRAIN.SEG_ONLY = False           # Only train two segmentation branchs
+_C.TRAIN.DET_ONLY = False           # Only train detection branch
+_C.TRAIN.ENC_SEG_ONLY = False       # Only train encoder and two segmentation branchs
+_C.TRAIN.ENC_DET_ONLY = False       # Only train encoder and detection branch
+# Single task
+_C.TRAIN.DRIVABLE_ONLY = False      # Only train da_segmentation task
+_C.TRAIN.LANE_ONLY = False          # Only train ll_segmentation task
+_C.TRAIN.DET_ONLY = False          # Only train detection task
+```
+Start training:
+```shell
+python tools/train.py
+```
+Multi GPU mode:
+```shell
+python -m torch.distributed.launch --nproc_per_node=N tools/train.py  # N: the number of GPUs
+```
+### Evaluation
+You can set the evaluation configuration in the `./lib/config/default.py`. (Including： batch_size and threshold value for nms).
+Start evaluating:
+```shell
+python tools/test.py --weights weights/End-to-end.pth
+```
+### Demo Test
+We provide two testing method.
+#### Folder
+You can store the image or video in `--source`, and then save the reasoning result to `--save-dir`
+```shell
+python tools/demo.py --source inference/images
+```
+#### Camera
+If there are any camera connected to your computer, you can set the `source` as the camera number(The default is 0).
+```shell
+python tools/demo.py --source 0
+```
+#### Demonstration
+<table>
+    <tr>
+            <th>input</th>
+            <th>output</th>
+    </tr>
+    <tr>
+        <td><img src=pictures/input1.gif /></td>
+        <td><img src=pictures/output1.gif/></td>
+    </tr>
+    <tr>
+         <td><img src=pictures/input2.gif /></td>
+        <td><img src=pictures/output2.gif/></td>
+    </tr>
+</table>
+### Deployment
+Our model can reason in real-time on `Jetson Tx2`, with `Zed Camera` to capture image. We use `TensorRT` tool for speeding up. We provide code for deployment and reasoning of model in  `./toolkits/deploy`.
+### Segmentation Label(Mask) Generation
+You can generate the label for drivable area segmentation task by running
+```shell
+python toolkits/datasetpre/gen_bdd_seglabel.py
+```
+#### Model Transfer
+Before reasoning with TensorRT C++ API, you need to transfer the `.pth` file into binary file which can be read by C++.
+```shell
+python toolkits/deploy/gen_wts.py
+```
+After running the above command, you obtain a binary file named `yolop.wts`.
+#### Running Inference
+TensorRT needs an engine file for inference. Building an engine is time-consuming. It is convenient to save an engine file so that you can reuse it every time you run the inference. The process is integrated in `main.cpp`. It can determine whether to build an engine according to the existence of your engine file.
+### Third Parties Resource
+* YOLOP OpenCV-DNN C++ Demo: [YOLOP-opencv-dnn](https://github.com/hpc203/YOLOP-opencv-dnn) from [hpc203](https://github.com/hpc203)
+* YOLOP ONNXRuntime C++ Demo: [lite.ai.toolkit](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/yolop.cpp) from [DefTruth](https://github.com/DefTruth)
+* YOLOP NCNN C++ Demo: [YOLOP-NCNN](https://github.com/EdVince/YOLOP-NCNN) from [EdVince](https://github.com/EdVince)
+* YOLOP MNN C++ Demo: [YOLOP-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_yolop.cpp) from [DefTruth](https://github.com/DefTruth)
+* YOLOP TNN C++ Demo: [YOLOP-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_yolop.cpp) from [DefTruth](https://github.com/DefTruth)
+## Citation
+If you find our paper and code useful for your research, please consider giving a star :star:   and citation :pencil: :
+```BibTeX
+@article{wu2022yolop,
+  title={Yolop: You only look once for panoptic driving perception},
+  author={Wu, Dong and Liao, Man-Wen and Zhang, Wei-Tian and Wang, Xing-Gang and Bai, Xiang and Cheng, Wen-Qing and Liu, Wen-Yu},
+  journal={Machine Intelligence Research},
+  pages={1--13},
+  year={2022},
+  publisher={Springer}
+}
+```

export_onnx.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import torch
+import torch.nn as nn
+from lib.models.common import Conv, SPP, Bottleneck, BottleneckCSP, Focus, Concat, Detect, SharpenConv
+from torch.nn import Upsample
+from lib.utils import check_anchor_order
+from lib.utils import initialize_weights
+import argparse
+import onnx
+import onnxruntime as ort
+import onnxsim
+import math
+import cv2
+# The lane line and the driving area segment branches without share information with each other and without link
+YOLOP = [
+    [24, 33, 42],  # Det_out_idx, Da_Segout_idx, LL_Segout_idx
+    [-1, Focus, [3, 32, 3]],  # 0
+    [-1, Conv, [32, 64, 3, 2]],  # 1
+    [-1, BottleneckCSP, [64, 64, 1]],  # 2
+    [-1, Conv, [64, 128, 3, 2]],  # 3
+    [-1, BottleneckCSP, [128, 128, 3]],  # 4
+    [-1, Conv, [128, 256, 3, 2]],  # 5
+    [-1, BottleneckCSP, [256, 256, 3]],  # 6
+    [-1, Conv, [256, 512, 3, 2]],  # 7
+    [-1, SPP, [512, 512, [5, 9, 13]]],  # 8 SPP
+    [-1, BottleneckCSP, [512, 512, 1, False]],  # 9
+    [-1, Conv, [512, 256, 1, 1]],  # 10
+    [-1, Upsample, [None, 2, 'nearest']],  # 11
+    [[-1, 6], Concat, [1]],  # 12
+    [-1, BottleneckCSP, [512, 256, 1, False]],  # 13
+    [-1, Conv, [256, 128, 1, 1]],  # 14
+    [-1, Upsample, [None, 2, 'nearest']],  # 15
+    [[-1, 4], Concat, [1]],  # 16         #Encoder
+    [-1, BottleneckCSP, [256, 128, 1, False]],  # 17
+    [-1, Conv, [128, 128, 3, 2]],  # 18
+    [[-1, 14], Concat, [1]],  # 19
+    [-1, BottleneckCSP, [256, 256, 1, False]],  # 20
+    [-1, Conv, [256, 256, 3, 2]],  # 21
+    [[-1, 10], Concat, [1]],  # 22
+    [-1, BottleneckCSP, [512, 512, 1, False]],  # 23
+    [[17, 20, 23], Detect,
+     [1, [[3, 9, 5, 11, 4, 20], [7, 18, 6, 39, 12, 31], [19, 50, 38, 81, 68, 157]], [128, 256, 512]]],
+    # Detection head 24: from_(features from specific layers), block, nc(num_classes) anchors ch(channels)
+    [16, Conv, [256, 128, 3, 1]],  # 25
+    [-1, Upsample, [None, 2, 'nearest']],  # 26
+    [-1, BottleneckCSP, [128, 64, 1, False]],  # 27
+    [-1, Conv, [64, 32, 3, 1]],  # 28
+    [-1, Upsample, [None, 2, 'nearest']],  # 29
+    [-1, Conv, [32, 16, 3, 1]],  # 30
+    [-1, BottleneckCSP, [16, 8, 1, False]],  # 31
+    [-1, Upsample, [None, 2, 'nearest']],  # 32
+    [-1, Conv, [8, 2, 3, 1]],  # 33 Driving area segmentation head
+    [16, Conv, [256, 128, 3, 1]],  # 34
+    [-1, Upsample, [None, 2, 'nearest']],  # 35
+    [-1, BottleneckCSP, [128, 64, 1, False]],  # 36
+    [-1, Conv, [64, 32, 3, 1]],  # 37
+    [-1, Upsample, [None, 2, 'nearest']],  # 38
+    [-1, Conv, [32, 16, 3, 1]],  # 39
+    [-1, BottleneckCSP, [16, 8, 1, False]],  # 40
+    [-1, Upsample, [None, 2, 'nearest']],  # 41
+    [-1, Conv, [8, 2, 3, 1]]  # 42 Lane line segmentation head
+]
+class MCnet(nn.Module):
+    def __init__(self, block_cfg):
+        super(MCnet, self).__init__()
+        layers, save = [], []
+        self.nc = 1  # traffic or not
+        self.detector_index = -1
+        self.det_out_idx = block_cfg[0][0]
+        self.seg_out_idx = block_cfg[0][1:]
+        self.num_anchors = 3
+        self.num_outchannel = 5 + self.nc  # dx,dy,dw,dh,obj_conf+cls_conf
+        # Build model
+        for i, (from_, block, args) in enumerate(block_cfg[1:]):
+            block = eval(block) if isinstance(block, str) else block  # eval strings
+            if block is Detect:
+                self.detector_index = i
+            block_ = block(*args)
+            block_.index, block_.from_ = i, from_
+            layers.append(block_)
+            save.extend(x % i for x in ([from_] if isinstance(from_, int) else from_) if x != -1)  # append to savelist
+        assert self.detector_index == block_cfg[0][0]
+        self.model, self.save = nn.Sequential(*layers), sorted(save)
+        self.names = [str(i) for i in range(self.nc)]
+        # set stride、anchor for detector
+        Detector = self.model[self.detector_index]  # detector
+        if isinstance(Detector, Detect):
+            s = 128  # 2x min stride
+            # for x in self.forward(torch.zeros(1, 3, s, s)):
+            #     print (x.shape)
+            with torch.no_grad():
+                model_out = self.forward(torch.zeros(1, 3, s, s))
+                detects, _, _ = model_out
+                Detector.stride = torch.tensor([s / x.shape[-2] for x in detects])  # forward
+            # print("stride"+str(Detector.stride ))
+            Detector.anchors /= Detector.stride.view(-1, 1, 1)  # Set the anchors for the corresponding scale
+            check_anchor_order(Detector)
+            self.stride = Detector.stride
+            # self._initialize_biases()
+        initialize_weights(self)
+    def forward(self, x):
+        cache = []
+        out = []
+        det_out = None
+        for i, block in enumerate(self.model):
+            if block.from_ != -1:
+                x = cache[block.from_] if isinstance(block.from_, int) \
+                    else [x if j == -1 else cache[j] for j in
+                          block.from_]  # calculate concat detect
+            x = block(x)
+            if i in self.seg_out_idx:  # save driving area segment result
+                # m = nn.Sigmoid()
+                # out.append(m(x))
+                out.append(torch.sigmoid(x))
+            if i == self.detector_index:
+                # det_out = x
+                if self.training:
+                    det_out = x
+                else:
+                    det_out = x[0]  # (torch.cat(z, 1), input_feat) if test
+            cache.append(x if block.index in self.save else None)
+        return det_out, out[0], out[1]  # det, da, ll
+        # (1,na*ny*nx*nl,no=2+2+1+nc=xy+wh+obj_conf+cls_prob), (1,2,h,w) (1,2,h,w)
+    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        # m = self.model[-1]  # Detect() module
+        m = self.model[self.detector_index]  # Detect() module
+        for mi, s in zip(m.m, m.stride):  # from
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--height', type=int, default=640)  # height
+    parser.add_argument('--width', type=int, default=640)  # width
+    args = parser.parse_args()
+    do_simplify = True
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    model = MCnet(YOLOP)
+    checkpoint = torch.load('./weights/End-to-end.pth', map_location=device)
+    model.load_state_dict(checkpoint['state_dict'])
+    model.eval()
+    height = args.height
+    width = args.width
+    print("Load ./weights/End-to-end.pth done!")
+    onnx_path = f'./weights/yolop-{height}-{width}.onnx'
+    inputs = torch.randn(1, 3, height, width)
+    print(f"Converting to {onnx_path}")
+    torch.onnx.export(model, inputs, onnx_path,
+                      verbose=False, opset_version=12, input_names=['images'],
+                      output_names=['det_out', 'drive_area_seg', 'lane_line_seg'])
+    print('convert', onnx_path, 'to onnx finish!!!')
+    # Checks
+    model_onnx = onnx.load(onnx_path)  # load onnx model
+    onnx.checker.check_model(model_onnx)  # check onnx model
+    print(onnx.helper.printable_graph(model_onnx.graph))  # print
+    if do_simplify:
+        print(f'simplifying with onnx-simplifier {onnxsim.__version__}...')
+        model_onnx, check = onnxsim.simplify(model_onnx, check_n=3)
+        assert check, 'assert check failed'
+        onnx.save(model_onnx, onnx_path)
+    x = inputs.cpu().numpy()
+    try:
+        sess = ort.InferenceSession(onnx_path)
+        for ii in sess.get_inputs():
+            print("Input: ", ii)
+        for oo in sess.get_outputs():
+            print("Output: ", oo)
+        print('read onnx using onnxruntime sucess')
+    except Exception as e:
+        print('read failed')
+        raise e
+    """
+    PYTHONPATH=. python3 ./export_onnx.py --height 640 --width 640
+    PYTHONPATH=. python3 ./export_onnx.py --height 1280 --width 1280
+    PYTHONPATH=. python3 ./export_onnx.py --height 320 --width 320
+    """

hubconf.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# YOLOP by hustvl, MIT License
+dependencies = ['torch']
+import torch
+from lib.utils.utils import select_device
+from lib.config import cfg
+from lib.models import get_net
+from pathlib import Path
+import os
+def yolop(pretrained=True, device="cpu"):
+    """Creates YOLOP model
+    Arguments:
+        pretrained (bool): load pretrained weights into the model
+        wieghts (int): the url of pretrained weights
+        device (str): cuda device i.e. 0 or 0,1,2,3 or cpu
+    Returns:
+        YOLOP pytorch model
+    """
+    device = select_device(device = device)
+    model = get_net(cfg)
+    if pretrained:
+        path = os.path.join(Path(__file__).resolve().parent, "weights/End-to-end.pth")
+        checkpoint = torch.load(path, map_location= device)
+        model.load_state_dict(checkpoint['state_dict'])
+    model = model.to(device)
+    return model

lib/__init__.py ADDED Viewed

File without changes

lib/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (126 Bytes). View file

lib/config/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .default import _C as cfg
2	+ from .default import update_config

lib/config/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (208 Bytes). View file

lib/config/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (200 Bytes). View file

lib/config/__pycache__/default.cpython-310.pyc ADDED Viewed

Binary file (2.83 kB). View file

lib/config/__pycache__/default.cpython-37.pyc ADDED Viewed

Binary file (2.69 kB). View file

lib/config/default.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import os
+import platform
+from yacs.config import CfgNode as CN
+_C = CN()
+_C.LOG_DIR = 'runs/'
+_C.GPUS = (0,)  # 只使用第一块 GPU（你只有一块）
+# Auto-detect optimal worker count based on OS and CPU cores
+# Windows: use 0 to avoid multiprocessing issues (most stable)
+# Linux: use 8 workers for better performance (4090D推荐8-16)
+_C.WORKERS = 0 if platform.system() == 'Windows' else 8
+_C.PIN_MEMORY = False  # 4090D 启用 PIN_MEMORY 加速数据传输
+_C.PRINT_FREQ = 20
+_C.AUTO_RESUME =False       # Resume from the last training interrupt
+_C.NEED_AUTOANCHOR = False      # Re-select the prior anchor(k-means)    When training from scratch (epoch=0), set it to be ture!
+_C.DEBUG = False
+_C.num_seg_class = 2
+# Cudnn related params
+_C.CUDNN = CN()
+_C.CUDNN.BENCHMARK = True
+_C.CUDNN.DETERMINISTIC = False
+_C.CUDNN.ENABLED = True
+# common params for NETWORK
+_C.MODEL = CN(new_allowed=True)
+_C.MODEL.NAME = 'yolop_yolov11_small'
+_C.MODEL.STRU_WITHSHARE = False     #add share_block to segbranch
+_C.MODEL.HEADS_NAME = ['']
+_C.MODEL.PRETRAINED = ""
+_C.MODEL.PRETRAINED_DET = ""  # 已弃用
+_C.MODEL.IMAGE_SIZE = [640, 640]  # width * height, ex: 192 * 256
+_C.MODEL.EXTRA = CN(new_allowed=True)
+# loss params
+_C.LOSS = CN(new_allowed=True)
+_C.LOSS.LOSS_NAME = ''
+# YOLOv11 Multi-head lambda weights: [cls, obj(unused), box, da_seg, ll_seg, ll_iou]
+# 这是在各个GAIN基础上的额外权重系数，最终损失 = 原始损失 × GAIN × LAMBDA
+_C.LOSS.MULTI_HEAD_LAMBDA = [1.0, 1.0, 1.0, 0.5, 0.5, 0.8]  # 分割任务降权
+_C.LOSS.FL_GAMMA = 0.0  # focal loss gamma
+_C.LOSS.CLS_POS_WEIGHT = 1.0  # classification loss positive weights
+_C.LOSS.OBJ_POS_WEIGHT = 1.0  # object loss positive weights (YOLOv5 only)
+_C.LOSS.SEG_POS_WEIGHT = 1.0  # segmentation loss positive weights
+_C.LOSS.BOX_GAIN = 7.5  # box loss gain (YOLOv11 default, 适合小量级box loss)
+_C.LOSS.CLS_GAIN = 0.5  # classification loss gain
+_C.LOSS.OBJ_GAIN = 1.0  # object loss gain (YOLOv5 only, unused in YOLOv11)
+_C.LOSS.DA_SEG_GAIN = 0.2  # driving area seg loss (降低: 2.0->0.5, seg量级大需要小权重)
+_C.LOSS.LL_SEG_GAIN = 0.2  # lane line seg loss (降低: 2.0->0.5)
+_C.LOSS.LL_IOU_GAIN = 0.2  # lane line iou loss (降低: 2.0->1.0, iou量级适中)
+# DATASET related params
+_C.DATASET = CN(new_allowed=True)
+_C.DATASET.DATAROOT = 'E:\\minimake\\100k'       # the path of images folder
+_C.DATASET.LABELROOT = 'E:/minimake/det_annotations/zwt/bdd/bdd100k/labels/det_annotations'      # the path of det_annotations folder label
+_C.DATASET.MASKROOT = 'E:/minimake/da_seg_annotations'                # the path of da_seg_annotations folder mask
+_C.DATASET.LANEROOT = 'E:\\minimake\\ll_seg_annotations'               # the path of ll_seg_annotations folder lane
+_C.DATASET.DATASET = 'BddDataset'
+_C.DATASET.TRAIN_SET = 'train'
+_C.DATASET.TEST_SET = 'val'
+_C.DATASET.DATA_FORMAT = 'jpg'
+_C.DATASET.SELECT_DATA = False
+_C.DATASET.ORG_IMG_SIZE = [720, 1280]
+# training data augmentation
+_C.DATASET.FLIP = True
+_C.DATASET.SCALE_FACTOR = 0.5 # 0.25
+_C.DATASET.ROT_FACTOR = 15 # 10
+_C.DATASET.TRANSLATE = 0.1
+_C.DATASET.SHEAR = 0.0
+_C.DATASET.COLOR_RGB = False
+_C.DATASET.HSV_H = 0.015  # image HSV-Hue augmentation (fraction)
+_C.DATASET.HSV_S = 0.7  # image HSV-Saturation augmentation (fraction)
+_C.DATASET.HSV_V = 0.4  # image HSV-Value augmentation (fraction)
+# TODO: more augmet params to add
+# train
+_C.TRAIN = CN(new_allowed=True)
+# 大 batch_size (64) 需要更大的学习率，按比例缩放：lr = base_lr * (batch_size / base_batch)
+# base: lr=0.001, batch=4  →  batch=64: lr = 0.001 * (64/4) = 0.016
+_C.TRAIN.LR0 = 0.001  # initial learning rate for batch_size=64 (scaled from 0.001)
+_C.TRAIN.LRF = 0.01  # final OneCycleLR learning rate (lr0 * lrf) - YOLOv11 uses 0.01
+_C.TRAIN.WARMUP_EPOCHS = 5.0  # 大batch增加warmup到5 epoch
+_C.TRAIN.WARMUP_BIASE_LR = 0.1
+_C.TRAIN.WARMUP_MOMENTUM = 0.8
+_C.TRAIN.OPTIMIZER = 'adam'
+_C.TRAIN.MOMENTUM = 0.937
+_C.TRAIN.WD = 0.0005
+_C.TRAIN.NESTEROV = True
+_C.TRAIN.GAMMA1 = 0.99
+_C.TRAIN.GAMMA2 = 0.0
+_C.TRAIN.BEGIN_EPOCH = 0
+_C.TRAIN.END_EPOCH = 200  # YOLOv11 recommends 200+ epochs
+_C.TRAIN.VAL_FREQ = 1
+_C.TRAIN.BATCH_SIZE_PER_GPU = 2  # 4090D (24GB) 可以支持 batch_size=64
+_C.TRAIN.SHUFFLE = True
+_C.TRAIN.IOU_THRESHOLD = 0.2
+_C.TRAIN.ANCHOR_THRESHOLD = 4.0
+# if training 3 tasks end-to-end, set all parameters as True
+# Alternating optimization
+_C.TRAIN.SEG_ONLY = False           # Only train two segmentation branchs
+_C.TRAIN.DET_ONLY = False           # Only train detection branch
+_C.TRAIN.ENC_SEG_ONLY = False       # Only train encoder and two segmentation branchs
+_C.TRAIN.ENC_DET_ONLY = False       # Only train encoder and detection branch
+# Single task
+_C.TRAIN.DRIVABLE_ONLY = False      # Only train da_segmentation task
+_C.TRAIN.LANE_ONLY = False          # Only train ll_segmentation task
+_C.TRAIN.DET_ONLY = False          # Only train detection task
+_C.TRAIN.PLOT = True                #
+# testing
+_C.TEST = CN(new_allowed=True)
+_C.TEST.BATCH_SIZE_PER_GPU = 8
+_C.TEST.MODEL_FILE = ''
+_C.TEST.SAVE_JSON = False
+_C.TEST.SAVE_TXT = False
+_C.TEST.PLOTS = True
+_C.TEST.NMS_CONF_THRESHOLD  = 0.001
+_C.TEST.NMS_IOU_THRESHOLD  = 0.6
+def update_config(cfg, args):
+    cfg.defrost()
+    # cfg.merge_from_file(args.cfg)
+    if args.modelDir:
+        cfg.OUTPUT_DIR = args.modelDir
+    if args.logDir:
+        cfg.LOG_DIR = args.logDir
+    # if args.conf_thres:
+    #     cfg.TEST.NMS_CONF_THRESHOLD = args.conf_thres
+    # if args.iou_thres:
+    #     cfg.TEST.NMS_IOU_THRESHOLD = args.iou_thres
+    # cfg.MODEL.PRETRAINED = os.path.join(
+    #     cfg.DATA_DIR, cfg.MODEL.PRETRAINED
+    # )
+    #
+    # if cfg.TEST.MODEL_FILE:
+    #     cfg.TEST.MODEL_FILE = os.path.join(
+    #         cfg.DATA_DIR, cfg.TEST.MODEL_FILE
+    #     )
+    cfg.freeze()

lib/config/yolov11.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""
+YOLOv11 Backbone 配置文件
+使用 YOLOv11 作为 backbone 的 YOLOP 模型配置
+"""
+import os
+import platform
+from .default import _C as cfg
+# 覆盖默认配置以使用 YOLOv11
+cfg.MODEL.USE_YOLOV11 = True
+cfg.MODEL.YOLOV11_SCALE = 's'  # 'n', 's', 'm', 'l', 'x'
+cfg.MODEL.YOLOV11_WEIGHTS = ''  # 'weights/yolo11s.pt'
+cfg.MODEL.FREEZE_BACKBONE = False  # 是否冻结 backbone
+# 训练配置
+cfg.TRAIN.BATCH_SIZE_PER_GPU = 8  # YOLOv11s 可以用更大的 batch size
+cfg.TRAIN.END_EPOCH = 200  # YOLOv11 建议 200+ epochs
+# 学习率配置（冻结 backbone 时可以用更大的学习率）
+cfg.TRAIN.LR0 = 0.01  # 冻结 backbone 时增大学习率
+cfg.TRAIN.LRF = 0.01
+cfg.TRAIN.WARMUP_EPOCHS = 3.0  # 减少 warmup
+# 损失权重（YOLOv11 适配）
+cfg.LOSS.MULTI_HEAD_LAMBDA = [1.0, 1.0, 1.0, 0.5, 0.5, 0.8]
+cfg.LOSS.BOX_GAIN = 7.5
+cfg.LOSS.CLS_GAIN = 0.5
+cfg.LOSS.DA_SEG_GAIN = 0.5
+cfg.LOSS.LL_SEG_GAIN = 0.5
+cfg.LOSS.LL_IOU_GAIN = 1.0

lib/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .function import AverageMeter

lib/core/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (176 Bytes). View file

lib/core/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (168 Bytes). View file

lib/core/__pycache__/evaluate.cpython-310.pyc ADDED Viewed

Binary file (9.74 kB). View file

lib/core/__pycache__/evaluate.cpython-37.pyc ADDED Viewed

Binary file (9.76 kB). View file

lib/core/__pycache__/function.cpython-310.pyc ADDED Viewed

Binary file (14.8 kB). View file

lib/core/__pycache__/function.cpython-37.pyc ADDED Viewed

Binary file (14.7 kB). View file

lib/core/__pycache__/general.cpython-310.pyc ADDED Viewed

Binary file (14.2 kB). View file

lib/core/__pycache__/general.cpython-37.pyc ADDED Viewed

Binary file (14.4 kB). View file

lib/core/__pycache__/loss.cpython-310.pyc ADDED Viewed

Binary file (6.54 kB). View file

lib/core/__pycache__/postprocess.cpython-310.pyc ADDED Viewed

Binary file (5.73 kB). View file

lib/core/__pycache__/postprocess.cpython-37.pyc ADDED Viewed

Binary file (5.79 kB). View file

lib/core/activations.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Activation functions
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# Swish https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------
+class Swish(nn.Module):  #
+    @staticmethod
+    def forward(x):
+        return x * torch.sigmoid(x)
+class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
+    @staticmethod
+    def forward(x):
+        # return x * F.hardsigmoid(x)  # for torchscript and CoreML
+        return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
+class MemoryEfficientSwish(nn.Module):
+    class F(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x):
+            ctx.save_for_backward(x)
+            return x * torch.sigmoid(x)
+        @staticmethod
+        def backward(ctx, grad_output):
+            x = ctx.saved_tensors[0]
+            sx = torch.sigmoid(x)
+            return grad_output * (sx * (1 + x * (1 - sx)))
+    def forward(self, x):
+        return self.F.apply(x)
+# Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
+class Mish(nn.Module):
+    @staticmethod
+    def forward(x):
+        return x * F.softplus(x).tanh()
+class MemoryEfficientMish(nn.Module):
+    class F(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x):
+            ctx.save_for_backward(x)
+            return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
+        @staticmethod
+        def backward(ctx, grad_output):
+            x = ctx.saved_tensors[0]
+            sx = torch.sigmoid(x)
+            fx = F.softplus(x).tanh()
+            return grad_output * (fx + x * sx * (1 - fx * fx))
+    def forward(self, x):
+        return self.F.apply(x)
+# FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
+class FReLU(nn.Module):
+    def __init__(self, c1, k=3):  # ch_in, kernel
+        super().__init__()
+        self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
+        self.bn = nn.BatchNorm2d(c1)
+    def forward(self, x):
+        return torch.max(x, self.bn(self.conv(x)))

lib/core/evaluate.py ADDED Viewed

	@@ -0,0 +1,278 @@

+# Model validation metrics
+from pathlib import Path
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from . import general
+def fitness(x):
+    # Model fitness as a weighted combination of metrics
+    w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
+    return (x[:, :4] * w).sum(1)
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    # Arguments
+        tp:  True positives (nparray, nx1 or nx10).
+        conf:  Objectness value from 0-1 (nparray).
+        pred_cls:  Predicted object classes (nparray).
+        target_cls:  True object classes (nparray).
+        plot:  Plot precision-recall curve at mAP@0.5
+        save_dir:  Plot save directory
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # Sort by objectness
+    i = np.argsort(-conf)  # sorted index from big to small
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+    # Find unique classes, each number just showed up once
+    unique_classes = np.unique(target_cls)
+    # Create Precision-Recall curve and compute AP for each class
+    px, py = np.linspace(0, 1, 1000), []  # for plotting
+    pr_score = 0.1  # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
+    s = [unique_classes.shape[0], tp.shape[1]]  # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
+    ap, p, r = np.zeros(s), np.zeros((unique_classes.shape[0], 1000)), np.zeros((unique_classes.shape[0], 1000))
+    for ci, c in enumerate(unique_classes):
+        i = pred_cls == c
+        n_l = (target_cls == c).sum()  # number of labels
+        n_p = i.sum()  # number of predictions
+        if n_p == 0 or n_l == 0:
+            continue
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum(0)
+            tpc = tp[i].cumsum(0)
+            # Recall
+            recall = tpc / (n_l + 1e-16)  # recall curve
+            r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # r at pr_score, negative x, xp because xp decreases
+            # Precision
+            precision = tpc / (tpc + fpc)  # precision curve
+            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
+            # AP from recall-precision curve
+            for j in range(tp.shape[1]):
+                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+                if plot and (j == 0):
+                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
+    # Compute F1 score (harmonic mean of precision and recall)
+    f1 = 2 * p * r / (p + r + 1e-16)
+    i = r.mean(0).argmax()
+    if plot:
+        plot_pr_curve(px, py, ap, save_dir, names)
+    return p[:, i], r[:, i], ap, f1, unique_classes.astype('int32')
+def compute_ap(recall, precision):
+    """ Compute the average precision, given the recall and precision curves
+    # Arguments
+        recall:    The recall curve (list)
+        precision: The precision curve (list)
+    # Returns
+        Average precision, precision curve, recall curve
+    """
+    # Append sentinel values to beginning and end
+    mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
+    mpre = np.concatenate(([1.], precision, [0.]))
+    # Compute the precision envelope
+    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+    # Integrate area under curve
+    method = 'interp'  # methods: 'continuous', 'interp'
+    if method == 'interp':
+        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
+        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
+    else:  # 'continuous'
+        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
+    return ap, mpre, mrec
+class ConfusionMatrix:
+    # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
+    def __init__(self, nc, conf=0.25, iou_thres=0.45):
+        self.matrix = np.zeros((nc + 1, nc + 1))
+        self.nc = nc  # number of classes
+        self.conf = conf
+        self.iou_thres = iou_thres
+    def process_batch(self, detections, labels):
+        """
+        Return intersection-over-union (Jaccard index) of boxes.
+        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+        Arguments:
+            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+            labels (Array[M, 5]), class, x1, y1, x2, y2
+        Returns:
+            None, updates confusion matrix accordingly
+        """
+        detections = detections[detections[:, 4] > self.conf]
+        gt_classes = labels[:, 0].int()
+        detection_classes = detections[:, 5].int()
+        iou = general.box_iou(labels[:, 1:], detections[:, :4])
+        x = torch.where(iou > self.iou_thres)
+        if x[0].shape[0]:
+            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
+            if x[0].shape[0] > 1:
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                matches = matches[matches[:, 2].argsort()[::-1]]
+                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+        else:
+            matches = np.zeros((0, 3))
+        n = matches.shape[0] > 0
+        m0, m1, _ = matches.transpose().astype(np.int16)
+        for i, gc in enumerate(gt_classes):
+            j = m0 == i
+            if n and sum(j) == 1:
+                self.matrix[gc, detection_classes[m1[j]]] += 1  # correct
+            else:
+                self.matrix[gc, self.nc] += 1  # background FP
+        if n:
+            for i, dc in enumerate(detection_classes):
+                if not any(m1 == i):
+                    self.matrix[self.nc, dc] += 1  # background FN
+    def matrix(self):
+        return self.matrix
+    def plot(self, save_dir='', names=()):
+        try:
+            import seaborn as sn
+            array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6)  # normalize
+            array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
+            fig = plt.figure(figsize=(12, 9), tight_layout=True)
+            sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
+            labels = (0 < len(names) < 99) and len(names) == self.nc  # apply names to ticklabels
+            sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
+                       xticklabels=names + ['background FN'] if labels else "auto",
+                       yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1))
+            fig.axes[0].set_xlabel('True')
+            fig.axes[0].set_ylabel('Predicted')
+            fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
+        except Exception as e:
+            pass
+    def print(self):
+        for i in range(self.nc + 1):
+            print(' '.join(map(str, self.matrix[i])))
+class SegmentationMetric(object):
+    '''
+    imgLabel [batch_size, height(144), width(256)]
+    confusionMatrix [[0(TN),1(FP)],
+                     [2(FN),3(TP)]]
+    '''
+    def __init__(self, numClass):
+        self.numClass = numClass
+        self.confusionMatrix = np.zeros((self.numClass,)*2)
+    def pixelAccuracy(self):
+        # return all class overall pixel accuracy
+        # acc = (TP + TN) / (TP + TN + FP + TN)
+        acc = np.diag(self.confusionMatrix).sum() /  self.confusionMatrix.sum()
+        return acc
+    def lineAccuracy(self):
+        Acc = np.diag(self.confusionMatrix) / (self.confusionMatrix.sum(axis=1) + 1e-12)
+        return Acc[1]
+    def classPixelAccuracy(self):
+        # return each category pixel accuracy(A more accurate way to call it precision)
+        # acc = (TP) / TP + FP
+        classAcc = np.diag(self.confusionMatrix) / (self.confusionMatrix.sum(axis=0) + 1e-12)
+        return classAcc
+    def meanPixelAccuracy(self):
+        classAcc = self.classPixelAccuracy()
+        meanAcc = np.nanmean(classAcc)
+        return meanAcc
+    def meanIntersectionOverUnion(self):
+        # Intersection = TP Union = TP + FP + FN
+        # IoU = TP / (TP + FP + FN)
+        intersection = np.diag(self.confusionMatrix)
+        union = np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) - np.diag(self.confusionMatrix)
+        IoU = intersection / union
+        IoU[np.isnan(IoU)] = 0
+        mIoU = np.nanmean(IoU)
+        return mIoU
+    def IntersectionOverUnion(self):
+        intersection = np.diag(self.confusionMatrix)
+        union = np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) - np.diag(self.confusionMatrix)
+        IoU = intersection / union
+        IoU[np.isnan(IoU)] = 0
+        return IoU[1]
+    def genConfusionMatrix(self, imgPredict, imgLabel):
+        # remove classes from unlabeled pixels in gt image and predict
+        # print(imgLabel.shape)
+        mask = (imgLabel >= 0) & (imgLabel < self.numClass)
+        label = self.numClass * imgLabel[mask] + imgPredict[mask]
+        count = np.bincount(label, minlength=self.numClass**2)
+        confusionMatrix = count.reshape(self.numClass, self.numClass)
+        return confusionMatrix
+    def Frequency_Weighted_Intersection_over_Union(self):
+        # FWIOU =     [(TP+FN)/(TP+FP+TN+FN)] *[TP / (TP + FP + FN)]
+        freq = np.sum(self.confusionMatrix, axis=1) / np.sum(self.confusionMatrix)
+        iu = np.diag(self.confusionMatrix) / (
+                np.sum(self.confusionMatrix, axis=1) + np.sum(self.confusionMatrix, axis=0) -
+                np.diag(self.confusionMatrix))
+        FWIoU = (freq[freq > 0] * iu[freq > 0]).sum()
+        return FWIoU
+    def addBatch(self, imgPredict, imgLabel):
+        assert imgPredict.shape == imgLabel.shape
+        self.confusionMatrix += self.genConfusionMatrix(imgPredict, imgLabel)
+    def reset(self):
+        self.confusionMatrix = np.zeros((self.numClass, self.numClass))
+# Plots ----------------------------------------------------------------------------------------------------------------
+def plot_pr_curve(px, py, ap, save_dir='.', names=()):
+    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
+    py = np.stack(py, axis=1)
+    if 0 < len(names) < 21:  # show mAP in legend if < 10 classes
+        for i, y in enumerate(py.T):
+            ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0])  # plot(recall, precision)
+    else:
+        ax.plot(px, py, linewidth=1, color='grey')  # plot(recall, precision)
+    ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
+    ax.set_xlabel('Recall')
+    ax.set_ylabel('Precision')
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+    fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)

lib/core/function.py ADDED Viewed

	@@ -0,0 +1,510 @@

+import time
+from lib.core.evaluate import ConfusionMatrix,SegmentationMetric
+from lib.core.general import non_max_suppression,check_img_size,scale_coords,xyxy2xywh,xywh2xyxy,box_iou,coco80_to_coco91_class,plot_images,ap_per_class,output_to_target
+from lib.utils.utils import time_synchronized
+from lib.utils import plot_img_and_mask,plot_one_box,show_seg_result
+import torch
+from threading import Thread
+import numpy as np
+from PIL import Image
+from torchvision import transforms
+from pathlib import Path
+import json
+import random
+import cv2
+import os
+import math
+from torch.cuda import amp
+from tqdm import tqdm
+def train(cfg, train_loader, model, criterion, optimizer, scaler, epoch, num_batch, num_warmup,
+          writer_dict, logger, device, rank=-1):
+    """
+    train for one epoch
+    Inputs:
+    - config: configurations
+    - train_loader: loder for data
+    - model:
+    - criterion: (function) calculate all the loss, return total_loss, head_losses
+    - writer_dict:
+    outputs(2,)
+    output[0] len:3, [1,3,32,32,85], [1,3,16,16,85], [1,3,8,8,85]
+    output[1] len:1, [2,256,256]
+    output[2] len:1, [2,256,256]
+    target(2,)
+    target[0] [1,n,5]
+    target[1] [2,256,256]
+    target[2] [2,256,256]
+    Returns:
+    None
+    """
+    batch_time = AverageMeter()
+    data_time = AverageMeter()
+    losses = AverageMeter()
+    # switch to train mode
+    model.train()
+    start = time.time()
+    for i, (input, target, paths, shapes) in enumerate(train_loader):
+        intermediate = time.time()
+        #print('tims:{}'.format(intermediate-start))
+        num_iter = i + num_batch * (epoch - 1)
+        if num_iter < num_warmup:
+            # warm up
+            lf = lambda x: ((1 + math.cos(x * math.pi / cfg.TRAIN.END_EPOCH)) / 2) * \
+                           (1 - cfg.TRAIN.LRF) + cfg.TRAIN.LRF  # cosine
+            xi = [0, num_warmup]
+            # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+            for j, x in enumerate(optimizer.param_groups):
+                # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                x['lr'] = np.interp(num_iter, xi, [cfg.TRAIN.WARMUP_BIASE_LR if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
+                if 'momentum' in x:
+                    x['momentum'] = np.interp(num_iter, xi, [cfg.TRAIN.WARMUP_MOMENTUM, cfg.TRAIN.MOMENTUM])
+        data_time.update(time.time() - start)
+        if not cfg.DEBUG:
+            input = input.to(device, non_blocking=True)
+            assign_target = []
+            for tgt in target:
+                assign_target.append(tgt.to(device))
+            target = assign_target
+        with amp.autocast(enabled=device.type != 'cpu'):
+            outputs = model(input)
+            total_loss, head_losses = criterion(outputs, target, shapes,model)
+            # print(head_losses)
+        # compute gradient and do update step
+        optimizer.zero_grad()
+        scaler.scale(total_loss).backward()
+        scaler.step(optimizer)
+        scaler.update()
+        if rank in [-1, 0]:
+            # measure accuracy and record loss
+            losses.update(total_loss.item(), input.size(0))
+            # _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(),
+            #                                  target.detach().cpu().numpy())
+            # acc.update(avg_acc, cnt)
+            # measure elapsed time
+            batch_time.update(time.time() - start)
+            end = time.time()
+            if i % cfg.PRINT_FREQ == 0:
+                msg = 'Epoch: [{0}][{1}/{2}]\t' \
+                      'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
+                      'Speed {speed:.1f} samples/s\t' \
+                      'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
+                      'Loss {loss.val:.5f} ({loss.avg:.5f})'.format(
+                          epoch, i, len(train_loader), batch_time=batch_time,
+                          speed=input.size(0)/batch_time.val,
+                          data_time=data_time, loss=losses)
+                logger.info(msg)
+                writer = writer_dict['writer']
+                global_steps = writer_dict['train_global_steps']
+                writer.add_scalar('train_loss', losses.val, global_steps)
+                # writer.add_scalar('train_acc', acc.val, global_steps)
+                writer_dict['train_global_steps'] = global_steps + 1
+def validate(epoch,config, val_loader, val_dataset, model, criterion, output_dir,
+             tb_log_dir, writer_dict=None, logger=None, device='cpu', rank=-1):
+    """
+    validata
+    Inputs:
+    - config: configurations
+    - train_loader: loder for data
+    - model:
+    - criterion: (function) calculate all the loss, return
+    - writer_dict:
+    Return:
+    None
+    """
+    # setting
+    max_stride = 32
+    weights = None
+    save_dir = output_dir + os.path.sep + 'visualization'
+    if not os.path.exists(save_dir):
+        os.mkdir(save_dir)
+    # print(save_dir)
+    _, imgsz = [check_img_size(x, s=max_stride) for x in config.MODEL.IMAGE_SIZE] #imgsz is multiple of max_stride
+    batch_size = config.TRAIN.BATCH_SIZE_PER_GPU * len(config.GPUS)
+    test_batch_size = config.TEST.BATCH_SIZE_PER_GPU * len(config.GPUS)
+    training = False
+    is_coco = False #is coco dataset
+    save_conf=False # save auto-label confidences
+    verbose=False
+    save_hybrid=False
+    log_imgs,wandb = min(16,100), None
+    nc = 1
+    iouv = torch.linspace(0.5,0.95,10).to(device)     #iou vector for mAP@0.5:0.95
+    niou = iouv.numel()
+    try:
+        import wandb
+    except ImportError:
+        wandb = None
+        log_imgs = 0
+    seen =  0
+    confusion_matrix = ConfusionMatrix(nc=model.nc) #detector confusion matrix
+    da_metric = SegmentationMetric(config.num_seg_class) #segment confusion matrix
+    ll_metric = SegmentationMetric(2) #segment confusion matrix
+    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
+    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
+    coco91class = coco80_to_coco91_class()
+    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
+    p, r, f1, mp, mr, map50, map, t_inf, t_nms = 0., 0., 0., 0., 0., 0., 0., 0., 0.
+    losses = AverageMeter()
+    da_acc_seg = AverageMeter()
+    da_IoU_seg = AverageMeter()
+    da_mIoU_seg = AverageMeter()
+    ll_acc_seg = AverageMeter()
+    ll_IoU_seg = AverageMeter()
+    ll_mIoU_seg = AverageMeter()
+    T_inf = AverageMeter()
+    T_nms = AverageMeter()
+    # switch to train mode
+    model.eval()
+    jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
+    for batch_i, (img, target, paths, shapes) in tqdm(enumerate(val_loader), total=len(val_loader)):
+        if not config.DEBUG:
+            img = img.to(device, non_blocking=True)
+            assign_target = []
+            for tgt in target:
+                assign_target.append(tgt.to(device))
+            target = assign_target
+            nb, _, height, width = img.shape    #batch size, channel, height, width
+        with torch.no_grad():
+            pad_w, pad_h = shapes[0][1][1]
+            pad_w = int(pad_w)
+            pad_h = int(pad_h)
+            ratio = shapes[0][1][0][0]
+            t = time_synchronized()
+            det_out, da_seg_out, ll_seg_out= model(img)
+            t_inf = time_synchronized() - t
+            if batch_i > 0:
+                T_inf.update(t_inf/img.size(0),img.size(0))
+            inf_out,train_out = det_out
+            #driving area segment evaluation
+            _,da_predict=torch.max(da_seg_out, 1)
+            _,da_gt=torch.max(target[1], 1)
+            da_predict = da_predict[:, pad_h:height-pad_h, pad_w:width-pad_w]
+            da_gt = da_gt[:, pad_h:height-pad_h, pad_w:width-pad_w]
+            da_metric.reset()
+            da_metric.addBatch(da_predict.cpu(), da_gt.cpu())
+            da_acc = da_metric.pixelAccuracy()
+            da_IoU = da_metric.IntersectionOverUnion()
+            da_mIoU = da_metric.meanIntersectionOverUnion()
+            da_acc_seg.update(da_acc,img.size(0))
+            da_IoU_seg.update(da_IoU,img.size(0))
+            da_mIoU_seg.update(da_mIoU,img.size(0))
+            #lane line segment evaluation
+            _,ll_predict=torch.max(ll_seg_out, 1)
+            _,ll_gt=torch.max(target[2], 1)
+            ll_predict = ll_predict[:, pad_h:height-pad_h, pad_w:width-pad_w]
+            ll_gt = ll_gt[:, pad_h:height-pad_h, pad_w:width-pad_w]
+            ll_metric.reset()
+            ll_metric.addBatch(ll_predict.cpu(), ll_gt.cpu())
+            ll_acc = ll_metric.lineAccuracy()
+            ll_IoU = ll_metric.IntersectionOverUnion()
+            ll_mIoU = ll_metric.meanIntersectionOverUnion()
+            ll_acc_seg.update(ll_acc,img.size(0))
+            ll_IoU_seg.update(ll_IoU,img.size(0))
+            ll_mIoU_seg.update(ll_mIoU,img.size(0))
+            total_loss, head_losses = criterion((train_out,da_seg_out, ll_seg_out), target, shapes,model)   #Compute loss
+            losses.update(total_loss.item(), img.size(0))
+            #NMS
+            t = time_synchronized()
+            target[0][:, 2:] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels
+            lb = [target[0][target[0][:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
+            output = non_max_suppression(inf_out, conf_thres= config.TEST.NMS_CONF_THRESHOLD, iou_thres=config.TEST.NMS_IOU_THRESHOLD, labels=lb)
+            #output = non_max_suppression(inf_out, conf_thres=0.001, iou_thres=0.6)
+            #output = non_max_suppression(inf_out, conf_thres=config.TEST.NMS_CONF_THRES, iou_thres=config.TEST.NMS_IOU_THRES)
+            t_nms = time_synchronized() - t
+            if batch_i > 0:
+                T_nms.update(t_nms/img.size(0),img.size(0))
+            if config.TEST.PLOTS:
+                if batch_i == 0:
+                    for i in range(test_batch_size):
+                        img_test = cv2.imread(paths[i])
+                        da_seg_mask = da_seg_out[i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
+                        da_seg_mask = torch.nn.functional.interpolate(da_seg_mask, scale_factor=int(1/ratio), mode='bilinear')
+                        _, da_seg_mask = torch.max(da_seg_mask, 1)
+                        da_gt_mask = target[1][i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
+                        da_gt_mask = torch.nn.functional.interpolate(da_gt_mask, scale_factor=int(1/ratio), mode='bilinear')
+                        _, da_gt_mask = torch.max(da_gt_mask, 1)
+                        da_seg_mask = da_seg_mask.int().squeeze().cpu().numpy()
+                        da_gt_mask = da_gt_mask.int().squeeze().cpu().numpy()
+                        # seg_mask = seg_mask > 0.5
+                        # plot_img_and_mask(img_test, seg_mask, i,epoch,save_dir)
+                        img_test1 = img_test.copy()
+                        _ = show_seg_result(img_test, da_seg_mask, i,epoch,save_dir)
+                        _ = show_seg_result(img_test1, da_gt_mask, i, epoch, save_dir, is_gt=True)
+                        img_ll = cv2.imread(paths[i])
+                        ll_seg_mask = ll_seg_out[i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
+                        ll_seg_mask = torch.nn.functional.interpolate(ll_seg_mask, scale_factor=int(1/ratio), mode='bilinear')
+                        _, ll_seg_mask = torch.max(ll_seg_mask, 1)
+                        ll_gt_mask = target[2][i][:, pad_h:height-pad_h, pad_w:width-pad_w].unsqueeze(0)
+                        ll_gt_mask = torch.nn.functional.interpolate(ll_gt_mask, scale_factor=int(1/ratio), mode='bilinear')
+                        _, ll_gt_mask = torch.max(ll_gt_mask, 1)
+                        ll_seg_mask = ll_seg_mask.int().squeeze().cpu().numpy()
+                        ll_gt_mask = ll_gt_mask.int().squeeze().cpu().numpy()
+                        # seg_mask = seg_mask > 0.5
+                        # plot_img_and_mask(img_test, seg_mask, i,epoch,save_dir)
+                        img_ll1 = img_ll.copy()
+                        _ = show_seg_result(img_ll, ll_seg_mask, i,epoch,save_dir, is_ll=True)
+                        _ = show_seg_result(img_ll1, ll_gt_mask, i, epoch, save_dir, is_ll=True, is_gt=True)
+                        img_det = cv2.imread(paths[i])
+                        img_gt = img_det.copy()
+                        det = output[i].clone()
+                        if len(det):
+                            det[:,:4] = scale_coords(img[i].shape[1:],det[:,:4],img_det.shape).round()
+                        for *xyxy,conf,cls in reversed(det):
+                            #print(cls)
+                            label_det_pred = f'{names[int(cls)]} {conf:.2f}'
+                            plot_one_box(xyxy, img_det , label=label_det_pred, color=colors[int(cls)], line_thickness=3)
+                        cv2.imwrite(save_dir+"/batch_{}_{}_det_pred.png".format(epoch,i),img_det)
+                        labels = target[0][target[0][:, 0] == i, 1:]
+                        # print(labels)
+                        labels[:,1:5]=xywh2xyxy(labels[:,1:5])
+                        if len(labels):
+                            labels[:,1:5]=scale_coords(img[i].shape[1:],labels[:,1:5],img_gt.shape).round()
+                        for cls,x1,y1,x2,y2 in labels:
+                            #print(names)
+                            #print(cls)
+                            label_det_gt = f'{names[int(cls)]}'
+                            xyxy = (x1,y1,x2,y2)
+                            plot_one_box(xyxy, img_gt , label=label_det_gt, color=colors[int(cls)], line_thickness=3)
+                        cv2.imwrite(save_dir+"/batch_{}_{}_det_gt.png".format(epoch,i),img_gt)
+        # Statistics per image
+        # output([xyxy,conf,cls])
+        # target[0] ([img_id,cls,xyxy])
+        for si, pred in enumerate(output):
+            labels = target[0][target[0][:, 0] == si, 1:]     #all object in one image
+            nl = len(labels)    # num of object
+            tcls = labels[:, 0].tolist() if nl else []  # target class
+            path = Path(paths[si])
+            seen += 1
+            if len(pred) == 0:
+                if nl:
+                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
+                continue
+            # Predictions
+            predn = pred.clone()
+            scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1])  # native-space pred
+            # Append to text file
+            if config.TEST.SAVE_TXT:
+                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]]  # normalization gain whwh
+                for *xyxy, conf, cls in predn.tolist():
+                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+                    line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
+                    with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:
+                        f.write(('%g ' * len(line)).rstrip() % line + '\n')
+            # W&B logging
+            if config.TEST.PLOTS and len(wandb_images) < log_imgs:
+                box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
+                             "class_id": int(cls),
+                             "box_caption": "%s %.3f" % (names[cls], conf),
+                             "scores": {"class_score": conf},
+                             "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
+                boxes = {"predictions": {"box_data": box_data, "class_labels": names}}  # inference-space
+                wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name))
+            # Append to pycocotools JSON dictionary
+            if config.TEST.SAVE_JSON:
+                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
+                image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+                box = xyxy2xywh(predn[:, :4])  # xywh
+                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
+                for p, b in zip(pred.tolist(), box.tolist()):
+                    jdict.append({'image_id': image_id,
+                                  'category_id': coco91class[int(p[5])] if is_coco else int(p[5]),
+                                  'bbox': [round(x, 3) for x in b],
+                                  'score': round(p[4], 5)})
+            # Assign all predictions as incorrect
+            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
+            if nl:
+                detected = []  # target indices
+                tcls_tensor = labels[:, 0]
+                # target boxes
+                tbox = xywh2xyxy(labels[:, 1:5])
+                scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1])  # native-space labels
+                if config.TEST.PLOTS:
+                    confusion_matrix.process_batch(pred, torch.cat((labels[:, 0:1], tbox), 1))
+                # Per target class
+                for cls in torch.unique(tcls_tensor):
+                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)  # prediction indices
+                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)  # target indices
+                    # Search for detections
+                    if pi.shape[0]:
+                        # Prediction to target ious
+                        # n*m  n:pred  m:label
+                        ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1)  # best ious, indices
+                        # Append detections
+                        detected_set = set()
+                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
+                            d = ti[i[j]]  # detected target
+                            if d.item() not in detected_set:
+                                detected_set.add(d.item())
+                                detected.append(d)
+                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
+                                if len(detected) == nl:  # all targets already located in image
+                                    break
+            # Append statistics (correct, conf, pcls, tcls)
+            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
+        if config.TEST.PLOTS and batch_i < 3:
+            f = save_dir +'/'+ f'test_batch{batch_i}_labels.jpg'  # labels
+            #Thread(target=plot_images, args=(img, target[0], paths, f, names), daemon=True).start()
+            f = save_dir +'/'+ f'test_batch{batch_i}_pred.jpg'  # predictions
+            #Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start()
+    # Compute statistics
+    # stats : [[all_img_correct]...[all_img_tcls]]
+    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy  zip(*) :unzip
+    map70 = None
+    map75 = None
+    if len(stats) and stats[0].any():
+        p, r, ap, f1, ap_class = ap_per_class(*stats, plot=False, save_dir=save_dir, names=names)
+        ap50, ap70, ap75,ap = ap[:, 0], ap[:,4], ap[:,5],ap.mean(1)  # [P, R, AP@0.5, AP@0.5:0.95]
+        mp, mr, map50, map70, map75, map = p.mean(), r.mean(), ap50.mean(), ap70.mean(),ap75.mean(),ap.mean()
+        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
+    else:
+        nt = torch.zeros(1)
+    # Print results
+    pf = '%20s' + '%12.3g' * 6  # print format
+    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))
+    #print(map70)
+    #print(map75)
+    # Print results per class
+    if (verbose or (nc <= 20 and not training)) and nc > 1 and len(stats):
+        for i, c in enumerate(ap_class):
+            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
+    # Print speeds
+    t = tuple(x / seen * 1E3 for x in (t_inf, t_nms, t_inf + t_nms)) + (imgsz, imgsz, batch_size)  # tuple
+    if not training:
+        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
+    # Plots
+    if config.TEST.PLOTS:
+        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
+        if wandb and wandb.run:
+            wandb.log({"Images": wandb_images})
+            wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]})
+    # Save JSON
+    if config.TEST.SAVE_JSON and len(jdict):
+        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights
+        anno_json = '../coco/annotations/instances_val2017.json'  # annotations json
+        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
+        print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)
+        with open(pred_json, 'w') as f:
+            json.dump(jdict, f)
+        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
+            from pycocotools.coco import COCO
+            from pycocotools.cocoeval import COCOeval
+            anno = COCO(anno_json)  # init annotations api
+            pred = anno.loadRes(pred_json)  # init predictions api
+            eval = COCOeval(anno, pred, 'bbox')
+            if is_coco:
+                eval.params.imgIds = [int(Path(x).stem) for x in val_loader.dataset.img_files]  # image IDs to evaluate
+            eval.evaluate()
+            eval.accumulate()
+            eval.summarize()
+            map, map50 = eval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+        except Exception as e:
+            print(f'pycocotools unable to run: {e}')
+    # Return results
+    if not training:
+        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if config.TEST.SAVE_TXT else ''
+        print(f"Results saved to {save_dir}{s}")
+    model.float()  # for training
+    maps = np.zeros(nc) + map
+    for i, c in enumerate(ap_class):
+        maps[c] = ap[i]
+    da_segment_result = (da_acc_seg.avg,da_IoU_seg.avg,da_mIoU_seg.avg)
+    ll_segment_result = (ll_acc_seg.avg,ll_IoU_seg.avg,ll_mIoU_seg.avg)
+    # print(da_segment_result)
+    # print(ll_segment_result)
+    detect_result = np.asarray([mp, mr, map50, map])
+    # print('mp:{},mr:{},map50:{},map:{}'.format(mp, mr, map50, map))
+    #print segmet_result
+    t = [T_inf.avg, T_nms.avg]
+    return da_segment_result, ll_segment_result, detect_result, losses.avg, maps, t
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count if self.count != 0 else 0

lib/core/general.py ADDED Viewed

	@@ -0,0 +1,466 @@

+import glob
+import logging
+import os
+import platform
+import random
+import re
+import shutil
+import subprocess
+import time
+import torchvision
+from contextlib import contextmanager
+from copy import copy
+from pathlib import Path
+import cv2
+import math
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.nn as nn
+import yaml
+from PIL import Image
+from scipy.cluster.vq import kmeans
+from scipy.signal import butter, filtfilt
+from tqdm import tqdm
+def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9):
+    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
+    box2 = box2.T
+    # Get the coordinates of bounding boxes
+    if x1y1x2y2:  # x1, y1, x2, y2 = box1
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+    else:  # transform from xywh to xyxy
+        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+    # Intersection area
+    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
+            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
+    # Union Area
+    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+    union = w1 * h1 + w2 * h2 - inter + eps
+    iou = inter / union
+    if GIoU or DIoU or CIoU:
+        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
+        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
+        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
+            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
+            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
+                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
+            if DIoU:
+                return iou - rho2 / c2  # DIoU
+            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
+                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
+                with torch.no_grad():
+                    alpha = v / ((1 + eps) - iou + v)
+                return iou - (rho2 / c2 + v * alpha)  # CIoU
+        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
+            c_area = cw * ch + eps  # convex area
+            return iou - (c_area - union) / c_area  # GIoU
+    else:
+        return iou  # IoU
+def box_iou(box1, box2):
+    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 (Tensor[N, 4])
+        box2 (Tensor[M, 4])
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+    def box_area(box):
+        # box = 4xn
+        return (box[2] - box[0]) * (box[3] - box[1]) #(x2-x1)*(y2-y1)
+    area1 = box_area(box1.T)
+    area2 = box_area(box2.T)
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()):
+    """Performs Non-Maximum Suppression (NMS) on inference results
+    Returns:
+         detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
+    """
+    nc = prediction.shape[2] - 5  # number of classes
+    xc = prediction[..., 4] > conf_thres  # candidates
+    # Settings
+    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    max_det = 300  # maximum number of detections per image
+    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
+    time_limit = 10.0  # seconds to quit after
+    redundant = True  # require redundant detections
+    multi_label = nc > 1  # multiple labels per box (adds 0.5ms/img)
+    merge = False  # use merge-NMS
+    t = time.time()
+    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+        # Cat apriori labels if autolabelling
+        if labels and len(labels[xi]):
+            l = labels[xi]
+            v = torch.zeros((len(l), nc + 5), device=x.device)
+            v[:, :4] = l[:, 1:5]  # box
+            v[:, 4] = 1.0  # conf
+            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
+            x = torch.cat((x, v), 0)
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+        # Compute conf
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+        # Detections matrix nx6 (xyxy, conf, cls)
+        if multi_label:
+            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+        else:  # best class only
+            conf, j = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+        # Filter by class
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+        # Apply finite constraint
+        # if not torch.isfinite(x).all():
+        #     x = x[torch.isfinite(x).all(1)]
+        # Check shape
+        n = x.shape[0]  # number of boxes
+        if not n:  # no boxes
+            continue
+        elif n > max_nms:  # excess boxes
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        # Batched NMS
+        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        if i.shape[0] > max_det:  # limit detections
+            i = i[:max_det]
+        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
+            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
+            weights = iou * scores[None]  # box weights
+            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
+            if redundant:
+                i = i[iou.sum(1) > 1]  # require redundancy
+        output[xi] = x[i]
+        if (time.time() - t) > time_limit:
+            print(f'WARNING: NMS time limit {time_limit}s exceeded')
+            break  # time limit exceeded
+    return output
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+def fitness(x):
+    # Returns fitness (for use with results.txt or evolve.txt)
+    w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
+    return (x[:, :4] * w).sum(1)
+def check_img_size(img_size, s=32):
+    # Verify img_size is a multiple of stride s
+    new_size = make_divisible(img_size, int(s))  # ceil gs-multiple
+    if new_size != img_size:
+        print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
+    return new_size
+def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
+    # Rescale coords (xyxy) from img1_shape to img0_shape
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
+    coords[:, [0, 2]] -= pad[0]  # x padding
+    coords[:, [1, 3]] -= pad[1]  # y padding
+    coords[:, :4] /= gain
+    clip_coords(coords, img0_shape)
+    return coords
+def clip_coords(boxes, img_shape):
+    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    boxes[:, 0].clamp_(0, img_shape[1])  # x1
+    boxes[:, 1].clamp_(0, img_shape[0])  # y1
+    boxes[:, 2].clamp_(0, img_shape[1])  # x2
+    boxes[:, 3].clamp_(0, img_shape[0])  # y2
+def make_divisible(x, divisor):
+    # Returns x evenly divisible by divisor
+    return math.ceil(x / divisor) * divisor
+def xyxy2xywh(x):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
+    y[:, 2] = x[:, 2] - x[:, 0]  # width
+    y[:, 3] = x[:, 3] - x[:, 1]  # height
+    return y
+def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
+    # Plot image grid with labels
+    if isinstance(images, torch.Tensor):
+        images = images.cpu().float().numpy()
+    if isinstance(targets, torch.Tensor):
+        targets = targets.cpu().numpy()
+    # un-normalise
+    if np.max(images[0]) <= 1:
+        images *= 255
+    tl = 3  # line thickness
+    tf = max(tl - 1, 1)  # font thickness
+    bs, _, h, w = images.shape  # batch size, _, height, width
+    bs = min(bs, max_subplots)  # limit plot images
+    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+    # Check if we should resize
+    scale_factor = max_size / max(h, w)
+    if scale_factor < 1:
+        h = math.ceil(scale_factor * h)
+        w = math.ceil(scale_factor * w)
+    colors = color_list()  # list of colors
+    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+    for i, img in enumerate(images):
+        if i == max_subplots:  # if last batch has fewer images than we expect
+            break
+        block_x = int(w * (i // ns))
+        block_y = int(h * (i % ns))
+        img = img.transpose(1, 2, 0)
+        if scale_factor < 1:
+            img = cv2.resize(img, (w, h))
+        mosaic[block_y:block_y + h, block_x:block_x + w, :] = img
+        if len(targets) > 0:
+            image_targets = targets[targets[:, 0] == i]
+            boxes = xywh2xyxy(image_targets[:, 2:6]).T
+            classes = image_targets[:, 1].astype('int')
+            labels = image_targets.shape[1] == 6  # labels if no conf column
+            conf = None if labels else image_targets[:, 6]  # check for confidence presence (label vs pred)
+            if boxes.shape[1]:
+                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
+                    boxes[[0, 2]] *= w  # scale to pixels
+                    boxes[[1, 3]] *= h
+                elif scale_factor < 1:  # absolute coords need scale if image scales
+                    boxes *= scale_factor
+            boxes[[0, 2]] += block_x
+            boxes[[1, 3]] += block_y
+            for j, box in enumerate(boxes.T):
+                cls = int(classes[j])
+                color = colors[cls % len(colors)]
+                cls = names[cls] if names else cls
+                if labels or conf[j] > 0.25:  # 0.25 conf thresh
+                    label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
+                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
+        # Draw image filename labels
+        if paths:
+            label = Path(paths[i]).name[:40]  # trim to 40 char
+            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+            cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
+                        lineType=cv2.LINE_AA)
+        # Image border
+        cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
+    if fname:
+        r = min(1280. / max(h, w) / ns, 1.0)  # ratio to limit image size
+        mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
+        # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
+        Image.fromarray(mosaic).save(fname)  # PIL save
+    return mosaic
+def plot_one_box(x, img, color=None, label=None, line_thickness=None):
+    # Plots one bounding box on image img
+    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
+    color = color or [random.randint(0, 255) for _ in range(3)]
+    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
+    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
+    if label:
+        tf = max(tl - 1, 1)  # font thickness
+        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
+        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
+        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
+        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+def color_list():
+    # Return first 10 plt colors as (r,g,b) https://stackoverflow.com/questions/51350872/python-from-color-name-to-rgb
+    def hex2rgb(h):
+        return tuple(int(str(h[1 + i:1 + i + 2]), 16) for i in (0, 2, 4))
+    return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']]
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+    # Arguments
+        tp:  True positives (nparray, nx1 or nx10).
+        conf:  Objectness value from 0-1 (nparray).
+        pred_cls:  Predicted object classes (nparray).
+        target_cls:  True object classes (nparray).
+        plot:  Plot precision-recall curve at mAP@0.5
+        save_dir:  Plot save directory
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # Sort by objectness
+    i = np.argsort(-conf)
+    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+    # Find unique classes
+    unique_classes = np.unique(target_cls)
+    # Create Precision-Recall curve and compute AP for each class
+    px, py = np.linspace(0, 1, 1000), []  # for plotting
+    pr_score = 0.1  # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
+    s = [unique_classes.shape[0], tp.shape[1]]  # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
+    ap, p, r = np.zeros(s), np.zeros((unique_classes.shape[0], 1000)), np.zeros((unique_classes.shape[0], 1000))
+    for ci, c in enumerate(unique_classes):
+        i = pred_cls == c
+        n_l = (target_cls == c).sum()  # number of labels
+        n_p = i.sum()  # number of predictions
+        if n_p == 0 or n_l == 0:
+            continue
+        else:
+            # Accumulate FPs and TPs
+            fpc = (1 - tp[i]).cumsum(0)
+            tpc = tp[i].cumsum(0)
+            # Recall
+            recall = tpc / (n_l + 1e-16)  # recall curve
+            r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
+            # Precision
+            precision = tpc / (tpc + fpc)  # precision curve
+            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
+            # AP from recall-precision curve
+            for j in range(tp.shape[1]):
+                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+                if plot and (j == 0):
+                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
+    # Compute F1 score (harmonic mean of precision and recall)
+    f1 = 2 * p * r / (p + r + 1e-16)
+    i=r.mean(0).argmax()
+    if plot:
+        plot_pr_curve(px, py, ap, save_dir, names)
+    return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
+def compute_ap(recall, precision):
+    """ Compute the average precision, given the recall and precision curves.
+    Source: https://github.com/rbgirshick/py-faster-rcnn.
+    # Arguments
+        recall:    The recall curve (list).
+        precision: The precision curve (list).
+    # Returns
+        The average precision as computed in py-faster-rcnn.
+    """
+    # Append sentinel values to beginning and end
+    mrec = np.concatenate(([0.], recall, [recall[-1] + 1E-3]))
+    mpre = np.concatenate(([1.], precision, [0.]))
+    # Compute the precision envelope
+    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+    # Integrate area under curve
+    method = 'interp'  # methods: 'continuous', 'interp'
+    if method == 'interp':
+        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
+        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
+    else:  # 'continuous'
+        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
+    return ap, mpre, mrec
+def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
+    # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
+    # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
+    # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
+    # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
+    # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)]  # coco to darknet
+    x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
+         35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+         64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
+    return x
+def output_to_target(output):
+    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]
+    targets = []
+    for i, o in enumerate(output):
+        for *box, conf, cls in o.cpu().numpy():
+            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
+    return np.array(targets)
+def plot_pr_curve(px, py, ap, save_dir='.', names=()):
+    fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
+    py = np.stack(py, axis=1)
+    if 0 < len(names) < 21:  # show mAP in legend if < 10 classes
+        for i, y in enumerate(py.T):
+            ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0])  # plot(recall, precision)
+    else:
+        ax.plot(px, py, linewidth=1, color='grey')  # plot(recall, precision)
+    ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
+    ax.set_xlabel('Recall')
+    ax.set_ylabel('Precision')
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+    fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)

lib/core/loss.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import torch.nn as nn
+import torch
+from .general import bbox_iou
+from .postprocess import build_targets
+from lib.core.evaluate import SegmentationMetric
+class MultiHeadLoss(nn.Module):
+    """
+    collect all the loss we need
+    """
+    def __init__(self, losses, cfg, lambdas=None):
+        """
+        Inputs:
+        - losses: (list)[nn.Module, nn.Module, ...]
+        - cfg: config object
+        - lambdas: (list) + IoU loss, weight for each loss
+        """
+        super().__init__()
+        # lambdas: [cls, obj, iou, la_seg, ll_seg, ll_iou]
+        if not lambdas:
+            lambdas = [1.0 for _ in range(len(losses) + 3)]
+        assert all(lam >= 0.0 for lam in lambdas)
+        self.losses = nn.ModuleList(losses)
+        self.lambdas = lambdas
+        self.cfg = cfg
+    def forward(self, head_fields, head_targets, shapes, model):
+        """
+        Inputs:
+        - head_fields: (list) output from each task head
+        - head_targets: (list) ground-truth for each task head
+        - model:
+        Returns:
+        - total_loss: sum of all the loss
+        - head_losses: (tuple) contain all loss[loss1, loss2, ...]
+        """
+        # head_losses = [ll
+        #                 for l, f, t in zip(self.losses, head_fields, head_targets)
+        #                 for ll in l(f, t)]
+        #
+        # assert len(self.lambdas) == len(head_losses)
+        # loss_values = [lam * l
+        #                for lam, l in zip(self.lambdas, head_losses)
+        #                if l is not None]
+        # total_loss = sum(loss_values) if loss_values else None
+        # print(model.nc)
+        total_loss, head_losses = self._forward_impl(head_fields, head_targets, shapes, model)
+        return total_loss, head_losses
+    def _forward_impl(self, predictions, targets, shapes, model):
+        """
+        Args:
+            predictions: predicts of [[det_head1, det_head2, det_head3], drive_area_seg_head, lane_line_seg_head]
+            targets: gts [det_targets, segment_targets, lane_targets]
+            model:
+        Returns:
+            total_loss: sum of all the loss
+            head_losses: list containing losses
+        """
+        cfg = self.cfg
+        device = targets[0].device
+        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
+        tcls, tbox, indices, anchors = build_targets(cfg, predictions[0], targets[0], model)  # targets
+        # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
+        cp, cn = smooth_BCE(eps=0.0)
+        BCEcls, BCEobj, BCEseg = self.losses
+        # Calculate Losses
+        nt = 0  # number of targets
+        no = len(predictions[0])  # number of outputs
+        balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1]  # P3-5 or P3-6
+        # calculate detection loss
+        for i, pi in enumerate(predictions[0]):  # layer index, layer predictions
+            b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx
+            tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
+            n = b.shape[0]  # number of targets
+            if n:
+                nt += n  # cumulative targets
+                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+                # Regression
+                pxy = ps[:, :2].sigmoid() * 2. - 0.5
+                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                pbox = torch.cat((pxy, pwh), 1).to(device)  # predicted box
+                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
+                lbox += (1.0 - iou).mean()  # iou loss
+                # Objectness
+                tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio
+                # Classification
+                # print(model.nc)
+                if model.nc > 1:  # cls loss (only if multiple classes)
+                    t = torch.full_like(ps[:, 5:], cn, device=device)  # targets
+                    t[range(n), tcls[i]] = cp
+                    lcls += BCEcls(ps[:, 5:], t)  # BCE
+            lobj += BCEobj(pi[..., 4], tobj) * balance[i]  # obj loss
+        drive_area_seg_predicts = predictions[1].view(-1)
+        drive_area_seg_targets = targets[1].view(-1)
+        lseg_da = BCEseg(drive_area_seg_predicts, drive_area_seg_targets)
+        lane_line_seg_predicts = predictions[2].view(-1)
+        lane_line_seg_targets = targets[2].view(-1)
+        lseg_ll = BCEseg(lane_line_seg_predicts, lane_line_seg_targets)
+        metric = SegmentationMetric(2)
+        nb, _, height, width = targets[1].shape
+        pad_w, pad_h = shapes[0][1][1]
+        pad_w = int(pad_w)
+        pad_h = int(pad_h)
+        _,lane_line_pred=torch.max(predictions[2], 1)
+        _,lane_line_gt=torch.max(targets[2], 1)
+        lane_line_pred = lane_line_pred[:, pad_h:height-pad_h, pad_w:width-pad_w]
+        lane_line_gt = lane_line_gt[:, pad_h:height-pad_h, pad_w:width-pad_w]
+        metric.reset()
+        metric.addBatch(lane_line_pred.cpu(), lane_line_gt.cpu())
+        IoU = metric.IntersectionOverUnion()
+        liou_ll = 1 - IoU
+        s = 3 / no  # output count scaling
+        lcls *= cfg.LOSS.CLS_GAIN * s * self.lambdas[0]
+        lobj *= cfg.LOSS.OBJ_GAIN * s * (1.4 if no == 4 else 1.) * self.lambdas[1]
+        lbox *= cfg.LOSS.BOX_GAIN * s * self.lambdas[2]
+        lseg_da *= cfg.LOSS.DA_SEG_GAIN * self.lambdas[3]
+        lseg_ll *= cfg.LOSS.LL_SEG_GAIN * self.lambdas[4]
+        liou_ll *= cfg.LOSS.LL_IOU_GAIN * self.lambdas[5]
+        if cfg.TRAIN.DET_ONLY or cfg.TRAIN.ENC_DET_ONLY or cfg.TRAIN.DET_ONLY:
+            lseg_da = 0 * lseg_da
+            lseg_ll = 0 * lseg_ll
+            liou_ll = 0 * liou_ll
+        if cfg.TRAIN.SEG_ONLY or cfg.TRAIN.ENC_SEG_ONLY:
+            lcls = 0 * lcls
+            lobj = 0 * lobj
+            lbox = 0 * lbox
+        if cfg.TRAIN.LANE_ONLY:
+            lcls = 0 * lcls
+            lobj = 0 * lobj
+            lbox = 0 * lbox
+            lseg_da = 0 * lseg_da
+        if cfg.TRAIN.DRIVABLE_ONLY:
+            lcls = 0 * lcls
+            lobj = 0 * lobj
+            lbox = 0 * lbox
+            lseg_ll = 0 * lseg_ll
+            liou_ll = 0 * liou_ll
+        loss = lbox + lobj + lcls + lseg_da + lseg_ll + liou_ll
+        # loss = lseg
+        # return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
+        # 返回详细的损失字典，方便tensorboard记录
+        loss_dict = {
+            'box_loss': lbox.item(),
+            'obj_loss': lobj.item(),
+            'cls_loss': lcls.item(),
+            'da_seg_loss': lseg_da.item(),
+            'll_seg_loss': lseg_ll.item(),
+            'll_iou_loss': liou_ll.item(),
+            'total_loss': loss.item()
+        }
+        return loss, loss_dict
+def get_loss(cfg, device):
+    """
+    get MultiHeadLoss
+    Inputs:
+    -cfg: configuration use the loss_name part or
+          function part(like regression classification)
+    -device: cpu or gpu device
+    Returns:
+    -loss: (MultiHeadLoss)
+    """
+    # class loss criteria
+    BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([cfg.LOSS.CLS_POS_WEIGHT])).to(device)
+    # object loss criteria
+    BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([cfg.LOSS.OBJ_POS_WEIGHT])).to(device)
+    # segmentation loss criteria
+    BCEseg = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([cfg.LOSS.SEG_POS_WEIGHT])).to(device)
+    # Focal loss
+    gamma = cfg.LOSS.FL_GAMMA  # focal loss gamma
+    if gamma > 0:
+        BCEcls, BCEobj = FocalLoss(BCEcls, gamma), FocalLoss(BCEobj, gamma)
+    loss_list = [BCEcls, BCEobj, BCEseg]
+    loss = MultiHeadLoss(loss_list, cfg=cfg, lambdas=cfg.LOSS.MULTI_HEAD_LAMBDA)
+    return loss
+# example
+# class L1_Loss(nn.Module)
+def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
+    # return positive, negative label smoothing BCE targets
+    return 1.0 - 0.5 * eps, 0.5 * eps
+class FocalLoss(nn.Module):
+    # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
+    def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
+        # alpha  balance positive & negative samples
+        # gamma  focus on difficult samples
+        super(FocalLoss, self).__init__()
+        self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
+        self.gamma = gamma
+        self.alpha = alpha
+        self.reduction = loss_fcn.reduction
+        self.loss_fcn.reduction = 'none'  # required to apply FL to each element
+    def forward(self, pred, true):
+        loss = self.loss_fcn(pred, true)
+        # p_t = torch.exp(-loss)
+        # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability
+        # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
+        pred_prob = torch.sigmoid(pred)  # prob from logits
+        p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
+        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
+        modulating_factor = (1.0 - p_t) ** self.gamma
+        loss *= alpha_factor * modulating_factor
+        if self.reduction == 'mean':
+            return loss.mean()
+        elif self.reduction == 'sum':
+            return loss.sum()
+        else:  # 'none'
+            return loss

lib/core/postprocess.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import torch
+from lib.utils import is_parallel
+import numpy as np
+np.set_printoptions(threshold=np.inf)
+import cv2
+from sklearn.cluster import DBSCAN
+def build_targets(cfg, predictions, targets, model):
+    '''
+    predictions
+    [16, 3, 32, 32, 85]
+    [16, 3, 16, 16, 85]
+    [16, 3, 8, 8, 85]
+    torch.tensor(predictions[i].shape)[[3, 2, 3, 2]]
+    [32,32,32,32]
+    [16,16,16,16]
+    [8,8,8,8]
+    targets[3,x,7]
+    t [index, class, x, y, w, h, head_index]
+    '''
+    # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
+    det = model.module.model[model.module.detector_index] if is_parallel(model) \
+        else model.model[model.detector_index]  # Detect() module
+    # print(type(model))
+    # det = model.model[model.detector_index]
+    # print(type(det))
+    na, nt = det.na, targets.shape[0]  # number of anchors, targets
+    tcls, tbox, indices, anch = [], [], [], []
+    gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
+    ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
+    targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
+    g = 0.5  # bias
+    off = torch.tensor([[0, 0],
+                        [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
+                        # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+                        ], device=targets.device).float() * g  # offsets
+    for i in range(det.nl):
+        anchors = det.anchors[i] #[3,2]
+        gain[2:6] = torch.tensor(predictions[i].shape)[[3, 2, 3, 2]]  # xyxy gain
+        # Match targets to anchors
+        t = targets * gain
+        if nt:
+            # Matches
+            r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
+            j = torch.max(r, 1. / r).max(2)[0] < cfg.TRAIN.ANCHOR_THRESHOLD  # compare
+            # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
+            t = t[j]  # filter
+            # Offsets
+            gxy = t[:, 2:4]  # grid xy
+            gxi = gain[[2, 3]] - gxy  # inverse
+            j, k = ((gxy % 1. < g) & (gxy > 1.)).T
+            l, m = ((gxi % 1. < g) & (gxi > 1.)).T
+            j = torch.stack((torch.ones_like(j), j, k, l, m))
+            t = t.repeat((5, 1, 1))[j]
+            offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+        else:
+            t = targets[0]
+            offsets = 0
+        # Define
+        b, c = t[:, :2].long().T  # image, class
+        gxy = t[:, 2:4]  # grid xy
+        gwh = t[:, 4:6]  # grid wh
+        gij = (gxy - offsets).long()
+        gi, gj = gij.T  # grid xy indices
+        # Append
+        a = t[:, 6].long()  # anchor indices
+        indices.append((b, a, gj.clamp_(0, int(gain[3]) - 1), gi.clamp_(0, int(gain[2]) - 1)))  # image, anchor, grid indices
+        tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
+        anch.append(anchors[a])  # anchors
+        tcls.append(c)  # class
+    return tcls, tbox, indices, anch
+def morphological_process(image, kernel_size=5, func_type=cv2.MORPH_CLOSE):
+    """
+    morphological process to fill the hole in the binary segmentation result
+    :param image:
+    :param kernel_size:
+    :return:
+    """
+    if len(image.shape) == 3:
+        raise ValueError('Binary segmentation result image should be a single channel image')
+    if image.dtype is not np.uint8:
+        image = np.array(image, np.uint8)
+    kernel = cv2.getStructuringElement(shape=cv2.MORPH_ELLIPSE, ksize=(kernel_size, kernel_size))
+    # close operation fille hole
+    closing = cv2.morphologyEx(image, func_type, kernel, iterations=1)
+    return closing
+def connect_components_analysis(image):
+    """
+    connect components analysis to remove the small components
+    :param image:
+    :return:
+    """
+    if len(image.shape) == 3:
+        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray_image = image
+    # print(gray_image.dtype)
+    return cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S)
+def if_y(samples_x):
+    for sample_x in samples_x:
+        if len(sample_x):
+            # if len(sample_x) != (sample_x[-1] - sample_x[0] + 1) or sample_x[-1] == sample_x[0]:
+            if sample_x[-1] == sample_x[0]:
+                return False
+    return True
+def fitlane(mask, sel_labels, labels, stats):
+    H, W = mask.shape
+    for label_group in sel_labels:
+        states = [stats[k] for k in label_group]
+        x, y, w, h, _ = states[0]
+        # if len(label_group) > 1:
+        #     print('in')
+        #     for m in range(len(label_group)-1):
+        #         labels[labels == label_group[m+1]] = label_group[0]
+        t = label_group[0]
+        # samples_y = np.linspace(y, H-1, 30)
+        # else:
+        samples_y = np.linspace(y, y+h-1, 30)
+        samples_x = [np.where(labels[int(sample_y)]==t)[0] for sample_y in samples_y]
+        if if_y(samples_x):
+            samples_x = [int(np.mean(sample_x)) if len(sample_x) else -1 for sample_x in samples_x]
+            samples_x = np.array(samples_x)
+            samples_y = np.array(samples_y)
+            samples_y = samples_y[samples_x != -1]
+            samples_x = samples_x[samples_x != -1]
+            func = np.polyfit(samples_y, samples_x, 2)
+            x_limits = np.polyval(func, H-1)
+            # if (y_max + h - 1) >= 720:
+            if x_limits < 0 or x_limits > W:
+            # if (y_max + h - 1) > 720:
+                # draw_y = np.linspace(y, 720-1, 720-y)
+                draw_y = np.linspace(y, y+h-1, h)
+            else:
+                # draw_y = np.linspace(y, y+h-1, y+h-y)
+                draw_y = np.linspace(y, H-1, H-y)
+            draw_x = np.polyval(func, draw_y)
+            # draw_y = draw_y[draw_x < W]
+            # draw_x = draw_x[draw_x < W]
+            draw_points = (np.asarray([draw_x, draw_y]).T).astype(np.int32)
+            cv2.polylines(mask, [draw_points], False, 1, thickness=15)
+        else:
+            # if ( + w - 1) >= 1280:
+            samples_x = np.linspace(x, W-1, 30)
+            # else:
+            #     samples_x = np.linspace(x, x_max+w-1, 30)
+            samples_y = [np.where(labels[:, int(sample_x)]==t)[0] for sample_x in samples_x]
+            samples_y = [int(np.mean(sample_y)) if len(sample_y) else -1 for sample_y in samples_y]
+            samples_x = np.array(samples_x)
+            samples_y = np.array(samples_y)
+            samples_x = samples_x[samples_y != -1]
+            samples_y = samples_y[samples_y != -1]
+            try:
+                func = np.polyfit(samples_x, samples_y, 2)
+            except:
+                pass
+            # y_limits = np.polyval(func, 0)
+            # if y_limits > 720 or y_limits < 0:
+            # if (x + w - 1) >= 1280:
+            #     draw_x = np.linspace(x, 1280-1, 1280-x)
+            # else:
+            y_limits = np.polyval(func, 0)
+            if y_limits >= H or y_limits < 0:
+                draw_x = np.linspace(x, x+w-1, w+x-x)
+            else:
+                y_limits = np.polyval(func, W-1)
+                if y_limits >= H or y_limits < 0:
+                    draw_x = np.linspace(x, x+w-1, w+x-x)
+                # if x+w-1 < 640:
+                #     draw_x = np.linspace(0, x+w-1, w+x-x)
+                else:
+                    draw_x = np.linspace(x, W-1, W-x)
+            draw_y = np.polyval(func, draw_x)
+            draw_points = (np.asarray([draw_x, draw_y]).T).astype(np.int32)
+            cv2.polylines(mask, [draw_points], False, 1, thickness=15)
+    return mask
+def connect_lane(image, shadow_height=0):
+    if len(image.shape) == 3:
+        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray_image = image
+    if shadow_height:
+        image[:shadow_height] = 0
+    mask = np.zeros((image.shape[0], image.shape[1]), np.uint8)
+    num_labels, labels, stats, centers = cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S)
+    # ratios = []
+    selected_label = []
+    for t in range(1, num_labels, 1):
+        _, _, _, _, area = stats[t]
+        if area > 400:
+            selected_label.append(t)
+    if len(selected_label) == 0:
+        return mask
+    else:
+        split_labels = [[label,] for label in selected_label]
+        mask_post = fitlane(mask, split_labels, labels, stats)
+        return mask_post

lib/dataset/AutoDriveDataset.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import cv2
+import numpy as np
+# np.set_printoptions(threshold=np.inf)
+import random
+import torch
+import torchvision.transforms as transforms
+# from visualization import plot_img_and_mask,plot_one_box,show_seg_result
+from pathlib import Path
+from PIL import Image
+from torch.utils.data import Dataset
+from ..utils import letterbox, augment_hsv, random_perspective, xyxy2xywh, cutout
+class AutoDriveDataset(Dataset):
+    """
+    A general Dataset for some common function
+    """
+    def __init__(self, cfg, is_train, inputsize=640, transform=None):
+        """
+        initial all the characteristic
+        Inputs:
+        -cfg: configurations
+        -is_train(bool): whether train set or not
+        -transform: ToTensor and Normalize
+        Returns:
+        None
+        """
+        self.is_train = is_train
+        self.cfg = cfg
+        self.transform = transform
+        self.inputsize = inputsize
+        self.Tensor = transforms.ToTensor()
+        img_root = Path(cfg.DATASET.DATAROOT)
+        label_root = Path(cfg.DATASET.LABELROOT)
+        mask_root = Path(cfg.DATASET.MASKROOT)
+        lane_root = Path(cfg.DATASET.LANEROOT)
+        if is_train:
+            indicator = cfg.DATASET.TRAIN_SET
+        else:
+            indicator = cfg.DATASET.TEST_SET
+        self.img_root = img_root / indicator
+        self.label_root = label_root / indicator
+        self.mask_root = mask_root / indicator
+        self.lane_root = lane_root / indicator
+        # self.label_list = self.label_root.iterdir()
+        self.mask_list = self.mask_root.iterdir()
+        self.db = []
+        self.data_format = cfg.DATASET.DATA_FORMAT
+        self.scale_factor = cfg.DATASET.SCALE_FACTOR
+        self.rotation_factor = cfg.DATASET.ROT_FACTOR
+        self.flip = cfg.DATASET.FLIP
+        self.color_rgb = cfg.DATASET.COLOR_RGB
+        # self.target_type = cfg.MODEL.TARGET_TYPE
+        self.shapes = np.array(cfg.DATASET.ORG_IMG_SIZE)
+    def _get_db(self):
+        """
+        finished on children Dataset(for dataset which is not in Bdd100k format, rewrite children Dataset)
+        """
+        raise NotImplementedError
+    def evaluate(self, cfg, preds, output_dir):
+        """
+        finished on children dataset
+        """
+        raise NotImplementedError
+    def __len__(self,):
+        """
+        number of objects in the dataset
+        """
+        return len(self.db)
+    def __getitem__(self, idx):
+        """
+        Get input and groud-truth from database & add data augmentation on input
+        Inputs:
+        -idx: the index of image in self.db(database)(list)
+        self.db(list) [a,b,c,...]
+        a: (dictionary){'image':, 'information':}
+        Returns:
+        -image: transformed image, first passed the data augmentation in __getitem__ function(type:numpy), then apply self.transform
+        -target: ground truth(det_gt,seg_gt)
+        function maybe useful
+        cv2.imread
+        cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
+        cv2.warpAffine
+        """
+        data = self.db[idx]
+        img = cv2.imread(data["image"], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # seg_label = cv2.imread(data["mask"], 0)
+        if self.cfg.num_seg_class == 3:
+            seg_label = cv2.imread(data["mask"])
+        else:
+            seg_label = cv2.imread(data["mask"], 0)
+        lane_label = cv2.imread(data["lane"], 0)
+        #print(lane_label.shape)
+        # print(seg_label.shape)
+        # print(lane_label.shape)
+        # print(seg_label.shape)
+        resized_shape = self.inputsize
+        if isinstance(resized_shape, list):
+            resized_shape = max(resized_shape)
+        h0, w0 = img.shape[:2]  # orig hw
+        r = resized_shape / max(h0, w0)  # resize image to img_size
+        if r != 1:  # always resize down, only resize up if training with augmentation
+            interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
+            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
+            seg_label = cv2.resize(seg_label, (int(w0 * r), int(h0 * r)), interpolation=interp)
+            lane_label = cv2.resize(lane_label, (int(w0 * r), int(h0 * r)), interpolation=interp)
+        h, w = img.shape[:2]
+        (img, seg_label, lane_label), ratio, pad = letterbox((img, seg_label, lane_label), resized_shape, auto=True, scaleup=self.is_train)
+        shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+        # ratio = (w / w0, h / h0)
+        # print(resized_shape)
+        det_label = data["label"]
+        labels=[]
+        if det_label.size > 0:
+            # Normalized xywh to pixel xyxy format
+            labels = det_label.copy()
+            labels[:, 1] = ratio[0] * w * (det_label[:, 1] - det_label[:, 3] / 2) + pad[0]  # pad width
+            labels[:, 2] = ratio[1] * h * (det_label[:, 2] - det_label[:, 4] / 2) + pad[1]  # pad height
+            labels[:, 3] = ratio[0] * w * (det_label[:, 1] + det_label[:, 3] / 2) + pad[0]
+            labels[:, 4] = ratio[1] * h * (det_label[:, 2] + det_label[:, 4] / 2) + pad[1]
+        if self.is_train:
+            combination = (img, seg_label, lane_label)
+            (img, seg_label, lane_label), labels = random_perspective(
+                combination=combination,
+                targets=labels,
+                degrees=self.cfg.DATASET.ROT_FACTOR,
+                translate=self.cfg.DATASET.TRANSLATE,
+                scale=self.cfg.DATASET.SCALE_FACTOR,
+                shear=self.cfg.DATASET.SHEAR
+            )
+            #print(labels.shape)
+            augment_hsv(img, hgain=self.cfg.DATASET.HSV_H, sgain=self.cfg.DATASET.HSV_S, vgain=self.cfg.DATASET.HSV_V)
+            # img, seg_label, labels = cutout(combination=combination, labels=labels)
+            if len(labels):
+                # convert xyxy to xywh
+                labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
+                # Normalize coordinates 0 - 1
+                labels[:, [2, 4]] /= img.shape[0]  # height
+                labels[:, [1, 3]] /= img.shape[1]  # width
+            # if self.is_train:
+            # random left-right flip
+            lr_flip = True
+            if lr_flip and random.random() < 0.5:
+                img = np.fliplr(img)
+                seg_label = np.fliplr(seg_label)
+                lane_label = np.fliplr(lane_label)
+                if len(labels):
+                    labels[:, 1] = 1 - labels[:, 1]
+            # random up-down flip
+            ud_flip = False
+            if ud_flip and random.random() < 0.5:
+                img = np.flipud(img)
+                seg_label = np.filpud(seg_label)
+                lane_label = np.filpud(lane_label)
+                if len(labels):
+                    labels[:, 2] = 1 - labels[:, 2]
+        else:
+            if len(labels):
+                # convert xyxy to xywh
+                labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
+                # Normalize coordinates 0 - 1
+                labels[:, [2, 4]] /= img.shape[0]  # height
+                labels[:, [1, 3]] /= img.shape[1]  # width
+        labels_out = torch.zeros((len(labels), 6))
+        if len(labels):
+            labels_out[:, 1:] = torch.from_numpy(labels)
+        # Convert
+        # img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+        # img = img.transpose(2, 0, 1)
+        img = np.ascontiguousarray(img)
+        # seg_label = np.ascontiguousarray(seg_label)
+        # if idx == 0:
+        #     print(seg_label[:,:,0])
+        if self.cfg.num_seg_class == 3:
+            _,seg0 = cv2.threshold(seg_label[:,:,0],128,255,cv2.THRESH_BINARY)
+            _,seg1 = cv2.threshold(seg_label[:,:,1],1,255,cv2.THRESH_BINARY)
+            _,seg2 = cv2.threshold(seg_label[:,:,2],1,255,cv2.THRESH_BINARY)
+        else:
+            _,seg1 = cv2.threshold(seg_label,1,255,cv2.THRESH_BINARY)
+            _,seg2 = cv2.threshold(seg_label,1,255,cv2.THRESH_BINARY_INV)
+        _,lane1 = cv2.threshold(lane_label,1,255,cv2.THRESH_BINARY)
+        _,lane2 = cv2.threshold(lane_label,1,255,cv2.THRESH_BINARY_INV)
+#        _,seg2 = cv2.threshold(seg_label[:,:,2],1,255,cv2.THRESH_BINARY)
+        # # seg1[cutout_mask] = 0
+        # # seg2[cutout_mask] = 0
+        # seg_label /= 255
+        # seg0 = self.Tensor(seg0)
+        if self.cfg.num_seg_class == 3:
+            seg0 = self.Tensor(seg0)
+        seg1 = self.Tensor(seg1)
+        seg2 = self.Tensor(seg2)
+        # seg1 = self.Tensor(seg1)
+        # seg2 = self.Tensor(seg2)
+        lane1 = self.Tensor(lane1)
+        lane2 = self.Tensor(lane2)
+        # seg_label = torch.stack((seg2[0], seg1[0]),0)
+        if self.cfg.num_seg_class == 3:
+            seg_label = torch.stack((seg0[0],seg1[0],seg2[0]),0)
+        else:
+            seg_label = torch.stack((seg2[0], seg1[0]),0)
+        lane_label = torch.stack((lane2[0], lane1[0]),0)
+        # _, gt_mask = torch.max(seg_label, 0)
+        # _ = show_seg_result(img, gt_mask, idx, 0, save_dir='debug', is_gt=True)
+        target = [labels_out, seg_label, lane_label]
+        img = self.transform(img)
+        return img, target, data["image"], shapes
+    def select_data(self, db):
+        """
+        You can use this function to filter useless images in the dataset
+        Inputs:
+        -db: (list)database
+        Returns:
+        -db_selected: (list)filtered dataset
+        """
+        db_selected = ...
+        return db_selected
+    @staticmethod
+    def collate_fn(batch):
+        img, label, paths, shapes= zip(*batch)
+        label_det, label_seg, label_lane = [], [], []
+        for i, l in enumerate(label):
+            l_det, l_seg, l_lane = l
+            l_det[:, 0] = i  # add target image index for build_targets()
+            label_det.append(l_det)
+            label_seg.append(l_seg)
+            label_lane.append(l_lane)
+        return torch.stack(img, 0), [torch.cat(label_det, 0), torch.stack(label_seg, 0), torch.stack(label_lane, 0)], paths, shapes

lib/dataset/DemoDataset.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import glob
+import os
+import random
+import shutil
+import time
+from pathlib import Path
+from threading import Thread
+import cv2
+import math
+import numpy as np
+import torch
+from PIL import Image, ExifTags
+from torch.utils.data import Dataset
+from tqdm import tqdm
+from ..utils import letterbox_for_img, clean_str
+img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
+vid_formats = ['.mov', '.avi', '.mp4', '.mpg', '.mpeg', '.m4v', '.wmv', '.mkv']
+class LoadImages:  # for inference
+    def __init__(self, path, img_size=640):
+        p = str(Path(path))  # os-agnostic
+        p = os.path.abspath(p)  # absolute path
+        if '*' in p:
+            files = sorted(glob.glob(p, recursive=True))  # glob
+        elif os.path.isdir(p):
+            files = sorted(glob.glob(os.path.join(p, '*.*')))  # dir
+        elif os.path.isfile(p):
+            files = [p]  # files
+        else:
+            raise Exception('ERROR: %s does not exist' % p)
+        images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
+        videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
+        ni, nv = len(images), len(videos)
+        self.img_size = img_size
+        self.files = images + videos
+        self.nf = ni + nv  # number of files
+        self.video_flag = [False] * ni + [True] * nv
+        self.mode = 'images'
+        if any(videos):
+            self.new_video(videos[0])  # new video
+        else:
+            self.cap = None
+        assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
+                            (p, img_formats, vid_formats)
+    def __iter__(self):
+        self.count = 0
+        return self
+    def __next__(self):
+        if self.count == self.nf:
+            raise StopIteration
+        path = self.files[self.count]
+        if self.video_flag[self.count]:
+            # Read video
+            self.mode = 'video'
+            ret_val, img0 = self.cap.read()
+            if not ret_val:
+                self.count += 1
+                self.cap.release()
+                if self.count == self.nf:  # last video
+                    raise StopIteration
+                else:
+                    path = self.files[self.count]
+                    self.new_video(path)
+                    ret_val, img0 = self.cap.read()
+            h0, w0 = img0.shape[:2]
+            self.frame += 1
+            print('\n video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
+        else:
+            # Read image
+            self.count += 1
+            img0 = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)  # BGR
+            #img0 = cv2.cvtColor(img0, cv2.COLOR_BGR2RGB)
+            assert img0 is not None, 'Image Not Found ' + path
+            print('image %g/%g %s: \n' % (self.count, self.nf, path), end='')
+            h0, w0 = img0.shape[:2]
+        # Padded resize
+        img, ratio, pad = letterbox_for_img(img0, new_shape=self.img_size, auto=True)
+        h, w = img.shape[:2]
+        shapes = (h0, w0), ((h / h0, w / w0), pad)
+        # Convert
+        #img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+        img = np.ascontiguousarray(img)
+        # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
+        return path, img, img0, self.cap, shapes
+    def new_video(self, path):
+        self.frame = 0
+        self.cap = cv2.VideoCapture(path)
+        self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    def __len__(self):
+        return self.nf  # number of files
+class LoadStreams:  # multiple IP or RTSP cameras
+    def __init__(self, sources='streams.txt', img_size=640, auto=True):
+        self.mode = 'stream'
+        self.img_size = img_size
+        if os.path.isfile(sources):
+            with open(sources, 'r') as f:
+                sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
+        else:
+            sources = [sources]
+        n = len(sources)
+        self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
+        self.sources = [clean_str(x) for x in sources]  # clean source names for later
+        self.auto = auto
+        for i, s in enumerate(sources):  # index, source
+            # Start thread to read frames from video stream
+            print(f'{i + 1}/{n}: {s}... ', end='')
+            s = eval(s) if s.isnumeric() else s  # i.e. s = '0' local webcam
+            cap = cv2.VideoCapture(s)
+            assert cap.isOpened(), f'Failed to open {s}'
+            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            self.fps[i] = max(cap.get(cv2.CAP_PROP_FPS) % 100, 0) or 30.0  # 30 FPS fallback
+            self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf')  # infinite stream fallback
+            _, self.imgs[i] = cap.read()  # guarantee first frame
+            self.threads[i] = Thread(target=self.update, args=([i, cap]), daemon=True)
+            print(f" success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
+            self.threads[i].start()
+        print('')  # newline
+        # check for common shapes
+        s = np.stack([letterbox_for_img(x, self.img_size, auto=self.auto)[0].shape for x in self.imgs], 0)  # shapes
+        self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
+        if not self.rect:
+            print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
+    def update(self, i, cap):
+        # Read stream `i` frames in daemon thread
+        n, f, read = 0, self.frames[i], 1  # frame number, frame array, inference every 'read' frame
+        while cap.isOpened() and n < f:
+            n += 1
+            # _, self.imgs[index] = cap.read()
+            cap.grab()
+            if n % read == 0:
+                success, im = cap.retrieve()
+                self.imgs[i] = im if success else self.imgs[i] * 0
+            time.sleep(1 / self.fps[i])  # wait time
+    def __iter__(self):
+        self.count = -1
+        return self
+    def __next__(self):
+        self.count += 1
+        if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'):  # q to quit
+            cv2.destroyAllWindows()
+            raise StopIteration
+        # Letterbox
+        img0 = self.imgs.copy()
+        h0, w0 = img0[0].shape[:2]
+        img, _, pad = letterbox_for_img(img0[0], self.img_size, auto=self.rect and self.auto)
+        # Stack
+        h, w = img.shape[:2]
+        shapes = (h0, w0), ((h / h0, w / w0), pad)
+        # Convert
+        #img = img[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW
+        img = np.ascontiguousarray(img)
+        return self.sources, img, img0[0], None, shapes
+    def __len__(self):
+        return len(self.sources)  # 1E12 frames = 32 streams at 30 FPS for 30 years

lib/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .bdd import BddDataset
+from .AutoDriveDataset import AutoDriveDataset
+from .DemoDataset import LoadImages, LoadStreams

lib/dataset/__pycache__/AutoDriveDataset.cpython-310.pyc ADDED Viewed

Binary file (6.61 kB). View file

lib/dataset/__pycache__/AutoDriveDataset.cpython-37.pyc ADDED Viewed

Binary file (6.59 kB). View file

lib/dataset/__pycache__/DemoDataset.cpython-310.pyc ADDED Viewed

Binary file (6.55 kB). View file

lib/dataset/__pycache__/DemoDataset.cpython-37.pyc ADDED Viewed

Binary file (6.58 kB). View file

lib/dataset/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (279 Bytes). View file

lib/dataset/__pycache__/__init__.cpython-37.pyc ADDED Viewed

Binary file (271 Bytes). View file

lib/dataset/__pycache__/bdd.cpython-310.pyc ADDED Viewed

Binary file (2.73 kB). View file

lib/dataset/__pycache__/bdd.cpython-37.pyc ADDED Viewed

Binary file (2.69 kB). View file

lib/dataset/__pycache__/convert.cpython-310.pyc ADDED Viewed

Binary file (726 Bytes). View file

lib/dataset/__pycache__/convert.cpython-37.pyc ADDED Viewed

Binary file (712 Bytes). View file

lib/dataset/bdd.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import numpy as np
+import json
+from .AutoDriveDataset import AutoDriveDataset
+from .convert import convert, id_dict, id_dict_single
+from tqdm import tqdm
+single_cls = True       # just detect vehicle
+class BddDataset(AutoDriveDataset):
+    def __init__(self, cfg, is_train, inputsize, transform=None):
+        super().__init__(cfg, is_train, inputsize, transform)
+        self.db = self._get_db()
+        self.cfg = cfg
+    def _get_db(self):
+        """
+        get database from the annotation file
+        Inputs:
+        Returns:
+        gt_db: (list)database   [a,b,c,...]
+                a: (dictionary){'image':, 'information':, ......}
+        image: image path
+        mask: path of the segmetation label
+        label: [cls_id, center_x//256, center_y//256, w//256, h//256] 256=IMAGE_SIZE
+        """
+        print('building database...')
+        gt_db = []
+        height, width = self.shapes
+        for mask in tqdm(list(self.mask_list)):
+            mask_path = str(mask)
+            label_path = mask_path.replace(str(self.mask_root), str(self.label_root)).replace(".png", ".json")
+            image_path = mask_path.replace(str(self.mask_root), str(self.img_root)).replace(".png", ".jpg")
+            lane_path = mask_path.replace(str(self.mask_root), str(self.lane_root))
+            with open(label_path, 'r') as f:
+                label = json.load(f)
+            data = label['frames'][0]['objects']
+            data = self.filter_data(data)
+            gt = np.zeros((len(data), 5))
+            for idx, obj in enumerate(data):
+                category = obj['category']
+                if category == "traffic light":
+                    color = obj['attributes']['trafficLightColor']
+                    category = "tl_" + color
+                if category in id_dict.keys():
+                    x1 = float(obj['box2d']['x1'])
+                    y1 = float(obj['box2d']['y1'])
+                    x2 = float(obj['box2d']['x2'])
+                    y2 = float(obj['box2d']['y2'])
+                    cls_id = id_dict[category]
+                    if single_cls:
+                         cls_id=0
+                    gt[idx][0] = cls_id
+                    box = convert((width, height), (x1, x2, y1, y2))
+                    gt[idx][1:] = list(box)
+            rec = [{
+                'image': image_path,
+                'label': gt,
+                'mask': mask_path,
+                'lane': lane_path
+            }]
+            gt_db += rec
+        print('database build finish')
+        return gt_db
+    def filter_data(self, data):
+        remain = []
+        for obj in data:
+            if 'box2d' in obj.keys():  # obj.has_key('box2d'):
+                if single_cls:
+                    if obj['category'] in id_dict_single.keys():
+                        remain.append(obj)
+                else:
+                    remain.append(obj)
+        return remain
+    def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+        """
+        """
+        pass

lib/dataset/convert.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# bdd_labels = {
+# 'unlabeled':0, 'dynamic': 1, 'ego vehicle': 2, 'ground': 3,
+# 'static': 4, 'parking': 5, 'rail track': 6, 'road': 7,
+# 'sidewalk': 8, 'bridge': 9, 'building': 10, 'fence': 11,
+# 'garage': 12, 'guard rail': 13, 'tunnel': 14, 'wall': 15,
+# 'banner': 16, 'billboard': 17, 'lane divider': 18,'parking sign': 19,
+# 'pole': 20, 'polegroup': 21, 'street light': 22, 'traffic cone': 23,
+# 'traffic device': 24, 'traffic light': 25, 'traffic sign': 26, 'traffic sign frame': 27,
+# 'terrain': 28, 'vegetation': 29, 'sky': 30, 'person': 31,
+# 'rider': 32, 'bicycle': 33, 'bus': 34, 'car': 35,
+# 'caravan': 36, 'motorcycle': 37, 'trailer': 38, 'train': 39,
+# 'truck': 40
+# }
+id_dict = {'person': 0, 'rider': 1, 'car': 2, 'bus': 3, 'truck': 4,
+'bike': 5, 'motor': 6, 'tl_green': 7, 'tl_red': 8,
+'tl_yellow': 9, 'tl_none': 10, 'traffic sign': 11, 'train': 12}
+id_dict_single = {'car': 0, 'bus': 1, 'truck': 2,'train': 3}
+# id_dict = {'car': 0, 'bus': 1, 'truck': 2}
+def convert(size, box):
+    dw = 1./(size[0])
+    dh = 1./(size[1])
+    x = (box[0] + box[1])/2.0
+    y = (box[2] + box[3])/2.0
+    w = box[1] - box[0]
+    h = box[3] - box[2]
+    x = x*dw
+    w = w*dw
+    y = y*dh
+    h = h*dh
+    return (x,y,w,h)

lib/dataset/hust.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import numpy as np
+import json
+from .AutoDriveDataset import AutoDriveDataset
+from .convert import convert, id_dict, id_dict_single
+from tqdm import tqdm
+import os
+single_cls = False     # just detect vehicle
+class HustDataset(AutoDriveDataset):
+    def __init__(self, cfg, is_train, inputsize, transform=None):
+        super().__init__(cfg, is_train, inputsize, transform)
+        self.db = self._get_db()
+        self.cfg = cfg
+    def _get_db(self):
+        """
+        get database from the annotation file
+        Inputs:
+        Returns:
+        gt_db: (list)database   [a,b,c,...]
+                a: (dictionary){'image':, 'information':, ......}
+        image: image path
+        mask: path of the segmetation label
+        label: [cls_id, center_x//256, center_y//256, w//256, h//256] 256=IMAGE_SIZE
+        """
+        print('building database...')
+        gt_db = []
+        height, width = self.shapes
+        for mask in tqdm(list(self.mask_list)):
+            mask_path = str(mask)
+            label_path = self.label_root
+            # label_path = mask_path.replace(str(self.mask_root), str(self.label_root)).replace(".png", ".json")
+            image_path = mask_path.replace(str(self.mask_root), str(self.img_root)).replace(".png", ".jpg")
+            lane_path = mask_path.replace(str(self.mask_root), str(self.lane_root))
+            with open(label_path, 'r') as f:
+                label = json.load(f)
+            data = label[int(os.path.basename(image_path)[:-4])]["labels"]
+            data = self.filter_data(data)
+            gt = np.zeros((len(data), 5))
+            for idx, obj in enumerate(data):
+                category = obj['category']
+                if category == "traffic light":
+                    color = obj['attributes']['Traffic Light Color'][0]
+                    category = "tl_" + color
+                if category in id_dict.keys():
+                    x1 = float(obj['box2d']['x1'])
+                    y1 = float(obj['box2d']['y1'])
+                    x2 = float(obj['box2d']['x2'])
+                    y2 = float(obj['box2d']['y2'])
+                    cls_id = id_dict[category]
+                    if single_cls:
+                         cls_id=0
+                    gt[idx][0] = cls_id
+                    box = convert((width, height), (x1, x2, y1, y2))
+                    gt[idx][1:] = list(box)
+            rec = [{
+                'image': image_path,
+                'label': gt,
+                'mask': mask_path,
+                'lane': lane_path
+            }]
+            gt_db += rec
+        print('database build finish')
+        return gt_db
+    def filter_data(self, data):
+        remain = []
+        for obj in data:
+            if 'box2d' in obj.keys():  # obj.has_key('box2d'):
+                if single_cls:
+                    if obj['category'] in id_dict_single.keys():
+                        remain.append(obj)
+                else:
+                    remain.append(obj)
+        return remain
+    def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+        """
+        """
+        pass